From 5a7e2509f5e5321863cea48708a9991d82209e84 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 2 Mar 2022 15:24:42 +0200 Subject: [PATCH 01/37] fixes some substitution bugs --- stimela/backends/native.py | 4 ++++ stimela/cargo/cab/wsclean.yaml | 5 +++++ stimela/kitchen/recipe.py | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/stimela/backends/native.py b/stimela/backends/native.py index 13735a4a..d7de5f6a 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -69,10 +69,14 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[ """ # import module and get function object + path0 = sys.path.copy() + sys.path.append('.') try: mod = importlib.import_module(modulename) except ImportError as exc: raise StimelaCabRuntimeError(f"can't import {modulename}: {exc}", log=log) + finally: + sys.path = path0 func = getattr(mod, funcname, None) diff --git a/stimela/cargo/cab/wsclean.yaml b/stimela/cargo/cab/wsclean.yaml index b86aaab5..e0ceb667 100644 --- a/stimela/cargo/cab/wsclean.yaml +++ b/stimela/cargo/cab/wsclean.yaml @@ -28,6 +28,11 @@ outputs: dtype: List[File] implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-image.fits" must_exist: false + restored_timeint: + info: Restored images per time interval + dtype: List[File] + implicit: "{current.prefix}-t[0-9][0-9][0-9][0-9]-image.fits" + must_exist: false residual: dtype: List[File] implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-residual.fits" diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 2da62e9b..b3e0fdf6 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -254,11 +254,9 @@ def run(self, params=None, subst=None, batch=None): self.log.debug(f"validating outputs") validated = False - # insert output values into params for re-substitution and re-validation - output_params = {name: value for name, value in params.items() if name in self.cargo.outputs} try: - params = self.cargo.validate_outputs(output_params, loosely=self.skip, subst=subst) + params = self.cargo.validate_outputs(params, loosely=self.skip, subst=subst) validated = True except ScabhaBaseException as exc: level = logging.WARNING if self.skip else logging.ERROR @@ -277,6 +275,9 @@ def run(self, params=None, subst=None, batch=None): self.log_summary(level, "failed outputs", color="WARNING") raise + if subst is not None: + subst.current._merge_(params) + if validated: self.log_summary(logging.DEBUG, "validated outputs") @@ -739,6 +740,7 @@ def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS info = SubstitutionNS(fqname=self.fqname) subst._add_('info', info, nosubst=True) subst._add_('config', self.config, nosubst=True) + subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) # subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) # subst.recipe._merge_(params) From 95ea5f7e6338743238ddb2006db7ddabbe206acb Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 2 Mar 2022 17:21:43 +0200 Subject: [PATCH 02/37] fixing alias propagation --- stimela/kitchen/recipe.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index b3e0fdf6..ca26582a 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -546,13 +546,12 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): if schema.required and not have_step_param: existing_schema.required = True - ### OMS 16/02/2022: see https://github.com/caracal-pipeline/stimela2/issues/16 - ### this was misguided. If an aliased parameter is set in one of the steps, it should not become implicit, i.e. the user - ### should still be able to override it at recipe level. - - ## alias becomes implicit if any step parameter it refers to is defined (and it doesn't have its own default) - ## if have_step_param and alias_name not in self.defaults: - ## existing_schema.implicit = f"{step_label}.{step_param_name}" + ## if our alias doesn't have its own default set, and the step has something set, then we'll propagate the value + ## *from* the step to the recipe (for the first such step) + if have_step_param and alias_name not in self.defaults: + if alias_name not in self._alias_propagated_from_step: + self._alias_propagated_from_step[alias_name] = (step, step_param_name) + # all other steps will have the aliased value propagated *to* them self._alias_map[step_label, step_param_name] = alias_name self._alias_list.setdefault(alias_name, []).append((step, step_param_name)) @@ -594,6 +593,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # collect aliases self._alias_map = OrderedDict() self._alias_list = OrderedDict() + self._alias_propagated_from_step = OrderedDict() # collect from inputs and outputs for io in self.inputs, self.outputs: @@ -685,7 +685,7 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu # merge again subst.recipe._merge_(self.params) - # propagate aliases up to substeps + # propagate aliases up to/down from substeps for name, value in self.params.items(): self._propagate_parameter(name, value) @@ -772,13 +772,15 @@ def _link_steps(self): step.previous_step = steps[i-2] def _propagate_parameter(self, name, value): - ### OMS: not sure why I had this, why not propagae unresolveds? - ## if type(value) is not validate.Unresolved: + # check if aliased parameter is to be propagated down from a step + from_step, from_step_param_name = self._alias_propagated_from_step.get(name, (None, None)) + if from_step is not None: + if from_step_param_name in from_step.cargo.params: + self.params[name] = step.cargo.params[from_step_param_name] + + # propagate up to steps for step, step_param_name in self._alias_list.get(name, []): - if self.inputs_outputs[name].implicit: - if step_param_name in step.cargo.params: - self.params[name] = step.cargo.params[name] - else: + if step is not from_step: step.update_parameter(step_param_name, value) def update_parameter(self, name: str, value: Any): @@ -908,6 +910,7 @@ def loop_worker(inst, step, label, subst, count, iter_var): info.fqname = step.fqname stimelogging.update_file_logger(step.log, step.logopts, nesting=step.nesting, subst=subst, location=[step.fqname]) + inst.log.info(f"{'skipping' if step.skip else 'running'} step '{label}'") try: #step_params = step.run(subst=subst.copy(), batch=batch) # make a copy of the subst dict since recipe might modify step_params = step.run(subst=subst.copy()) # make a copy of the subst dict since recipe might modify @@ -926,10 +929,8 @@ def loop_worker(inst, step, label, subst, count, iter_var): for step1, step_param_name in inst._alias_list.get(name, []): if step1 is step and step_param_name in step_params: inst.params[name] = step_params[step_param_name] - # clear implicit setting - inst.outputs[name].implicit = None - - inst.log.info(f"{'skipping' if step.skip else 'running'} step '{label}'") + # # clear implicit setting + # inst.outputs[name].implicit = None loop_futures = [] From b595f8a150fa2788ec53fd72ef338940cbdd4127 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 2 Mar 2022 17:44:59 +0200 Subject: [PATCH 03/37] fixes for broken alias propagation (broken by PR #18) --- stimela/kitchen/recipe.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index ca26582a..790ed1c9 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -744,6 +744,9 @@ def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS # subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) # subst.recipe._merge_(params) + for name, (from_step, from_param) in self._alias_propagated_from_step.items(): + if name not in params: + params[name] = from_step.cargo.params[from_param] params = Cargo.validate_inputs(self, params, subst=subst, loosely=loosely) From 52a89588daa3c44ba53f922a1a2a48eb48e90bea Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 2 Mar 2022 17:47:27 +0200 Subject: [PATCH 04/37] fixes breakage in python callable args --- stimela/backends/native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stimela/backends/native.py b/stimela/backends/native.py index d7de5f6a..1e4618f2 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -94,7 +94,7 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[ for key, schema in cab.inputs_outputs.items(): if not schema.policies.skip: if key in cab.params: - args[key] = cab.params + args[key] = cab.params[key] elif cab.get_schema_policy(schema, 'pass_missing_as_none'): args[key] = None From 003894b38c765770cd920ae455be123898f229f4 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 5 Mar 2022 19:05:28 +0200 Subject: [PATCH 05/37] cleaned up aliasing logic and implicit values --- DEVNOTES.md | 61 ++++++++++ setup.py | 2 +- stimela/commands/run.py | 3 +- stimela/kitchen/recipe.py | 230 ++++++++++++++++++----------------- stimela/tests/test_recipe.py | 40 +++++- 5 files changed, 221 insertions(+), 115 deletions(-) create mode 100644 DEVNOTES.md diff --git a/DEVNOTES.md b/DEVNOTES.md new file mode 100644 index 00000000..90a4fd23 --- /dev/null +++ b/DEVNOTES.md @@ -0,0 +1,61 @@ + + + +Problem with globs: +------------------- + +* prevalidate() calls validation of inputs and outputs. This is the only chance for skipped recipes to get aliases propagated down. I.e. if a sub-recipe has an output that is an alias of a sub-step, and the sub-recipe is skipped, this is the only chance to evaluate the glob (presumably, to existing outputs on disk). + +* validate_inputs() called before running a step. Currently this does not evaluate globs. + +* validate_outputs() called after running. Here we must re-expand the globs, since running the step may have changed the content. + +The current scheme where the glob is expanded and substituted as ``params[name] = [files]`` creates a problem. Expansion needs to happen at prevalidation. Then it needs to happen again at the output stage. So we can't replace the glob with a filelist. Somehow we must retain knowledge that this is a glob, otherwise we won't know to re-evaluate it. + +I tried creating a Glob class, but pydantic won't allow that, it expects a list of strings. So we need to retain this information externally (in Cargo, perhaps?) + +So: keep a copy of the original params dict, and re-evaluate all globs when asked to. + +Consider adding an explicit "glob:" prefix to glob values, so that we know not to re-evaluate explicitly specified files? + + + + +Problem with aliases: +--------------------- + +Let's clean up the logic. First, things to check during finalization: + +* an input alias may refer to multiple steps' inputs, but then their schema.dtype must be consistent + +* an output alias may refer to only one step's output, aliasing multiple outputs is nonsensical + +* an alias's schema is copied from the step, so implicit outputs are implicit for the recipe as well (but mark implicit=True, +since we don't want to copy the value literally until the step has validated it in its own {}-substitution context) + +During prevalidation (when parameter values are available), they must be propagated up or down + +* before prevalidating recipe parameters + + * go over all aliases -- if a recipe's parameter is not set: + + * check if a substep has a default or implicit -- propagate that value down to the recipe + + +* before prevalidating steps -- if a recipe has a parameter (or default) + + * propagate that value up to the step aliases, overriding their values + + * implicit output values can't be set, so this will be caught at this point + +* after prevalidating the steps -- if the recipe does not have a parameter value + + * check if a substep has a parameter value or default or implicit -- propagate that value down to the recipe + + * if multiple substeps are aliased, check that they're not set to conflicting values, throw an error if so. Otherwise propagate up. + + * make a list of parameters that have been propagated down + +After running the step + +* outputs may have changed compared to prevalidation. Propagate their value down to the recipe again (using the list compiled in prevalidation) diff --git a/setup.py b/setup.py index 05a920d1..24c96e74 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ requirements = ["pyyaml", "nose>=1.3.7", "future-fstrings", - "scabha @ git+https://github.com/caracal-pipeline/scabha2", + "scabha >= 0.6.1", # @ git+https://github.com/caracal-pipeline/scabha2", "ruamel.yaml", "munch", "omegaconf>=2.1pre1", diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 3539ac2d..7a956883 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -291,7 +291,8 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, if outputs and step.log.isEnabledFor(logging.DEBUG): outer_step.log.debug("run successful, outputs follow:") for name, value in outputs.items(): - outer_step.log.debug(f" {name}: {value}") + if name in recipe.outputs: + outer_step.log.debug(f" {name}: {value}") else: outer_step.log.info("run successful") diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 790ed1c9..12fe7ee5 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -1,4 +1,5 @@ -import glob, time +import time +import copy from multiprocessing import cpu_count import os, os.path, re, logging from typing import Any, Tuple, List, Dict, Optional, Union @@ -8,6 +9,7 @@ from omegaconf import MISSING, OmegaConf, DictConfig, ListConfig from collections import OrderedDict from collections.abc import Mapping +from pytest import param from scabha import cargo from pathos.pools import ProcessPool from pathos.serial import SerialPool @@ -28,7 +30,7 @@ Conditional = Optional[str] -from scabha.cargo import Cargo, Cab, Batch +from scabha.cargo import Parameter, Cargo, Cab, Batch from scabha.types import File, Directory, MS @@ -290,7 +292,7 @@ def run(self, params=None, subst=None, batch=None): else: raise StepValidationError(f"invalid outputs: {join_quote(invalid)}", log=self.log) - return {name: value for name, value in self.cargo.params.items()} + return self.cargo.params @dataclass class ForLoopClause(object): @@ -500,6 +502,14 @@ def add(self, cabname: str, label: str = None, """ return self.add_step(Step(cab=cabname, params=params, info=info), label=label) + @dataclass + class AliasInfo(object): + label: str # step label + step: Step # step + param: str # parameter name + io: Dict[str, Parameter] # points to self.inputs or self.outputs + from_recipe: bool = False # if True, value propagates from recipe up to step + from_step: bool = False # if True, value propagates from step down to recipe def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): if type(alias_target) is str: @@ -510,51 +520,50 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): if step is None: raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step '{step_label}'", log=self.log) + # is the alias already defined + existing_alias = self._alias_list.get(alias_name, [None])[0] # find it in inputs or outputs input_schema = step.inputs.get(step_param_name) output_schema = step.outputs.get(step_param_name) schema = input_schema or output_schema if schema is None: raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) - # check that it's not an input that is already set - if step_param_name in step.params and input_schema: - raise RecipeValidationError(f"alias '{alias_name}' refers to input '{step_label}.{step_param_name}' that is already predefined", log=self.log) - # check that its I/O is consistent - if (input_schema and alias_name in self.outputs) or (output_schema and alias_name in self.inputs): - raise RecipeValidationError(f"alias '{alias_name}' can't refer to both an input and an output", log=self.log) - # see if it's already defined consistently - io = self.inputs if input_schema else self.outputs - existing_schema = io.get(alias_name) - if existing_schema is None: - io[alias_name] = schema.copy() # make copy of schema object - existing_schema = io[alias_name] # get reference to this copy now - existing_schema.required = False # will be set to True below as needed - existing_schema.implicit = None - else: - # if existing type was unset, set it quietly - if not existing_schema.dtype: - existing_schema.dtype = schema.dtype - # check if definition conflicts - elif schema.dtype != existing_schema.dtype: + # implicit inuts cannot be aliased + if input_schema and input_schema.implicit: + raise RecipeValidationError(f"alias '{alias_name}' refers to implicit input '{step_label}.{step_param_name}'", log=self.log) + # if alias is already defined, check for conflicts + if existing_alias is not None: + io = existing_alias.io + if io is self.outputs: + raise RecipeValidationError(f"output alias '{alias_name}' is defined more than once", log=self.log) + elif output_schema: + raise RecipeValidationError(f"alias '{alias_name}' refers to both an input and an output", log=self.log) + alias_schema = io[alias_name] + # now we know it's a multiply-defined input, check for type consistency + if alias_schema.dtype != schema.dtype: raise RecipeValidationError(f"alias '{alias_name}': dtype {schema.dtype} of '{step_label}.{step_param_name}' doesn't match previous dtype {existing_schema.dtype}", log=self.log) + + # else alias not yet defined, insert a schema + else: + io = self.inputs if input_schema else self.outputs + io[alias_name] = copy.copy(schema) + alias_schema = io[alias_name] # make copy of schema object + + # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs + if schema.implicit: + alias_schema.implicit = Unresolved(f"{step_label}.{step_param_name}") # will be resolved when propagated from step + self._implicit_params.add(alias_name) # this is True if the step's parameter is defined in any way (set, default, or implicit) have_step_param = step_param_name in step.params or step_param_name in step.cargo.defaults or \ schema.default is not None or schema.implicit is not None - # alias becomes required if any step parameter it refers to was required and unset + # alias becomes required if any step parameter it refers to was required, but wasn't already set if schema.required and not have_step_param: - existing_schema.required = True - - ## if our alias doesn't have its own default set, and the step has something set, then we'll propagate the value - ## *from* the step to the recipe (for the first such step) - if have_step_param and alias_name not in self.defaults: - if alias_name not in self._alias_propagated_from_step: - self._alias_propagated_from_step[alias_name] = (step, step_param_name) - # all other steps will have the aliased value propagated *to* them + alias_schema.required = True self._alias_map[step_label, step_param_name] = alias_name - self._alias_list.setdefault(alias_name, []).append((step, step_param_name)) + self._alias_list.setdefault(alias_name, []).append(Recipe.AliasInfo(step_label, step, step_param_name, io)) def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): if not self.finalized: @@ -593,7 +602,6 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # collect aliases self._alias_map = OrderedDict() self._alias_list = OrderedDict() - self._alias_propagated_from_step = OrderedDict() # collect from inputs and outputs for io in self.inputs, self.outputs: @@ -601,7 +609,6 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): if schema.aliases: if schema.dtype != "str" or schema.choices or schema.writable: raise RecipeValidationError(f"alias '{name}' should not specify type, choices or writability", log=log) - schema.dtype = "" # tells _add_alias to not check for alias_target in schema.aliases: self._add_alias(name, alias_target) @@ -675,43 +682,81 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu if self.for_loop is not None and self.for_loop.var in self.inputs: params[self.for_loop.var] = Unresolved("for-loop") - # validate our own parameters - try: - Cargo.prevalidate(self, params, subst=subst) - except ScabhaBaseException as exc: - msg = f"recipe pre-validation failed: {exc}" - errors.append(RecipeValidationError(msg, log=self.log)) + # prevalidate our own parameters. This substitutes in defaults and does {}-substitutions + # we call this twice, potentially, so define as a function + def prevalidate_self(): + try: + Cargo.prevalidate(self, params, subst=subst) + except ScabhaBaseException as exc: + msg = f"recipe pre-validation failed: {exc}" + errors.append(RecipeValidationError(msg, log=self.log)) + + # merge again, since values may have changed + subst.recipe._merge_(self.params) + + prevalidate_self() - # merge again - subst.recipe._merge_(self.params) + # propagate alias values up to substeps, except for implicit values (these only ever propagate down to us) + for name, aliases in self._alias_list.items(): + if name in self.params and name not in self._implicit_params: + for alias in aliases: + alias.from_recipe = True + alias.step.update_parameter(alias.param, self.params[name]) - # propagate aliases up to/down from substeps - for name, value in self.params.items(): - self._propagate_parameter(name, value) + # prevalidate step parameters + # we call this twice, potentially, so define as a function + def prevalidate_steps(): + for label, step in self.steps.items(): + self._prep_step(label, step, subst) + + try: + step.prevalidate(subst) + except ScabhaBaseException as exc: + if type(exc) is SubstitutionErrorList: + self.log.error(f"unresolved {{}}-substitution(s):") + for err in exc.errors: + self.log.error(f" {err}") + msg = f"step '{label}' failed pre-validation: {exc}" + errors.append(RecipeValidationError(msg, log=self.log)) + + subst.previous = subst.current + subst.steps[label] = subst.previous + + prevalidate_steps() + + # now check for aliases that need to be propagated up/down + if not errors: + revalidate_self = revalidate_steps = False + for name, aliases in self._alias_list.items(): + # propagate up if alias is not set, or it is implicit=True (meaning it gets set from an implicit substep parameter) + if name not in self.params or type(self.inputs_outputs[name].implicit) is Unresolved: + from_step = False + for alias in aliases: + # if alias is set in step but not with us, mark it as propagating down + if alias.param in alias.step.params: + alias.from_step = from_step = revalidate_self = True + self.params[name] = alias.step.params[alias.param] + # and break out, we do this for the first matching step only + break + # if we propagated an input value down from a step, check if we need to propagate it up to any other steps + # note that this only ever applies to inputs + if from_step: + for alias in aliases: + if not alias.from_step: + alias.from_recipe = revalidate_steps = True + alias.step.update_parameter(alias.param, self.params[name]) + + # do we or any steps need to be revalidated? + if revalidate_self: + prevalidate_self() + if revalidate_steps: + prevalidate_steps() # check for missing parameters if self.missing_params: msg = f"""recipe '{self.name}' is missing the following required parameters: {join_quote(self.missing_params)}""" errors.append(RecipeValidationError(msg, log=self.log)) - # prevalidate step parameters - for label, step in self.steps.items(): - self._prep_step(label, step, subst) - - try: - step.prevalidate(subst) - except ScabhaBaseException as exc: - if type(exc) is SubstitutionErrorList: - self.log.error(f"unresolved {{}}-substitution(s):") - for err in exc.errors: - self.log.error(f" {err}") - msg = f"step '{label}' failed pre-validation: {exc}" - errors.append(RecipeValidationError(msg, log=self.log)) - - subst.previous = subst.current - subst.steps[label] = subst.previous - - if errors: raise RecipeValidationError(f"{len(errors)} error(s) validating the recipe '{self.name}'", log=self.log) @@ -742,15 +787,7 @@ def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS subst._add_('config', self.config, nosubst=True) subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) - # subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) - # subst.recipe._merge_(params) - for name, (from_step, from_param) in self._alias_propagated_from_step.items(): - if name not in params: - params[name] = from_step.cargo.params[from_param] - - params = Cargo.validate_inputs(self, params, subst=subst, loosely=loosely) - - return params + return Cargo.validate_inputs(self, params, subst=subst, loosely=loosely) def _link_steps(self): """ @@ -774,32 +811,6 @@ def _link_steps(self): step.next_step = None step.previous_step = steps[i-2] - def _propagate_parameter(self, name, value): - # check if aliased parameter is to be propagated down from a step - from_step, from_step_param_name = self._alias_propagated_from_step.get(name, (None, None)) - if from_step is not None: - if from_step_param_name in from_step.cargo.params: - self.params[name] = step.cargo.params[from_step_param_name] - - # propagate up to steps - for step, step_param_name in self._alias_list.get(name, []): - if step is not from_step: - step.update_parameter(step_param_name, value) - - def update_parameter(self, name: str, value: Any): - """[summary] - - Parameters - ---------- - name : str - [description] - value : Any - [description] - """ - self.params[name] = value - # resolved values propagate up to substeps if aliases, and propagate back if implicit - self._propagate_parameter(name, value) - def summary(self, recursive=True): """Returns list of lines with a summary of the recipe state """ @@ -871,8 +882,9 @@ def _run(self) -> Dict[str, Any]: if type(value) is validate.Unresolved: raise RecipeValidationError(f"recipe '{self.name}' has unresolved input '{name}'", log=self.log) # propagate up all aliases - for step, step_param_name in self._alias_list.get(name, []): - step.update_parameter(step_param_name, value) + for alias in self._alias_list.get(name, []): + if alias.from_recipe: + alias.step.update_parameter(alias.param, value) else: if schema.required: raise RecipeValidationError(f"recipe '{self.name}' is missing required input '{name}'", log=self.log) @@ -927,14 +939,6 @@ def loop_worker(inst, step, label, subst, count, iter_var): subst.previous = step_params subst.steps[label] = subst.previous - # check aliases, our outputs need to be retrieved from the step - for name, _ in inst.outputs.items(): - for step1, step_param_name in inst._alias_list.get(name, []): - if step1 is step and step_param_name in step_params: - inst.params[name] = step_params[step_param_name] - # # clear implicit setting - # inst.outputs[name].implicit = None - loop_futures = [] for count, iter_var in enumerate(self._for_loop_values): @@ -956,8 +960,14 @@ def loop_worker(inst, step, label, subst, count, iter_var): # results = list(loop_pool.imap(loop_worker, *loop_args)) results = [loop_worker(*args) for args in loop_futures] + # now check for output aliases that need to be propagated down + for name, aliases in self._alias_list.items(): + for alias in aliases: + if alias.from_step: + self.params[name] = alias.step.params[alias.param] + self.log.info(f"recipe '{self.name}' executed successfully") - return {name: value for name, value in self.params.items() if name in self.outputs} + return OrderedDict((name, value) for name, value in self.params.items() if name in self.outputs) def run(self, **params) -> Dict[str, Any]: diff --git a/stimela/tests/test_recipe.py b/stimela/tests/test_recipe.py index 9af5b966..ccbf67fa 100644 --- a/stimela/tests/test_recipe.py +++ b/stimela/tests/test_recipe.py @@ -1,5 +1,41 @@ import stimela -import os +import os, re, subprocess + +def run(command): + """Runs command, returns tuple of exit code, output""" + try: + return 0, subprocess.check_output(command, shell=True).strip().decode() + except subprocess.CalledProcessError as exc: + return exc.returncode, exc.output.strip().decode() + +def verify_output(output, *regexes): + """Returns true if the output contains lines matching the regexes in sequence (possibly with other lines in between)""" + regexes = list(regexes[::-1]) + for line in output.split("\n"): + if regexes and re.search(regexes[-1], line): + regexes.pop() + if regexes: + print("Error, the following regexes did not match the output:") + for regex in regexes: + print(f" {regex}") + return False + return True + + +def test_test_aliasing(): + print("===== expecting an error since required parameters are missing =====") + retcode, _ = run("stimela -v exec test_aliasing.yml") + assert retcode != 0 + + print("===== expecting no errors now =====") + retcode, output = run("stimela -v exec test_aliasing.yml a=1 s3_a=1 s4_a=1") + assert retcode == 0 + print(output) + assert verify_output(output, + "DEBUG: ### validated outputs", + "DEBUG: recipe 'recipe'", + "DEBUG: out: 1") + def test_test_recipe(): print("===== expecting an error since 'msname' parameter is missing =====") @@ -27,8 +63,6 @@ def test_test_loop_recipe(): retcode = os.system("stimela -v exec test_loop_recipe.yml loop_recipe") assert retcode == 0 - - def test_runtime_recipe(): ## OMS ## disabling for now, need to revise to use "dummy" cabs (or add real cabs?) From 9666b519086807f986bac75f13218de1ee42be9a Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 5 Mar 2022 20:41:40 +0200 Subject: [PATCH 06/37] added test --- stimela/tests/test_aliasing.yml | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 stimela/tests/test_aliasing.yml diff --git a/stimela/tests/test_aliasing.yml b/stimela/tests/test_aliasing.yml new file mode 100644 index 00000000..5d000bbf --- /dev/null +++ b/stimela/tests/test_aliasing.yml @@ -0,0 +1,71 @@ +cabs: + echo: + image: null + command: echo + policies: + skip_implicits: false + key_value: true + # simms + inputs: + a: + dtype: str + required: true + b: + dtype: str + c: + dtype: str + d: + implicit: "d_is_implicit" + outputs: + out: + dtype: File + implicit: "{current.a}" + must_exist: false + out2: + dtype: File + must_exist: false + default: out2 + + +recipe: + name: "alias test recipe" + aliases: + a: [s1.a, s2.a] + b: [s1.b, s2.b] + out: [s4.out] + out2: [s3.out2] + + steps: + s1: + cab: echo + params: + b: 1 + c: 2 + s2: + cab: echo + params: + c: 2 + s3: + cab: echo + params: + c: 2 + s4: + cab: echo + params: + c: 4 + s5: + recipe: + name: "alias sub-recipe" + aliases: + a: [ss1.a, ss2.a] + steps: + ss1: + cab: echo + params: + c: 2 + ss2: + cab: echo + params: + c: 2 + params: + a: '{info.fqname}' From a6cf0674fc7430d2370be9d1824bc6eb17370f8b Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 5 Mar 2022 20:44:52 +0200 Subject: [PATCH 07/37] more fixes to alias propagation --- stimela/kitchen/recipe.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 12fe7ee5..75d4551b 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -375,8 +375,8 @@ def __post_init__ (self): for io, io_label in [(self.inputs, "inputs"), (self.outputs, "outputs")]: if self.for_loop.var in io: raise RecipeValidationError(f"'for_loop.var={self.for_loop.var}' clashes with recipe {io_label}") - # map of aliases - self._alias_map = None + # marked when finalized + self._alias_map = None # set of keys protected from assignment self._protected_from_assign = set() @@ -547,7 +547,7 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): else: io = self.inputs if input_schema else self.outputs io[alias_name] = copy.copy(schema) - alias_schema = io[alias_name] # make copy of schema object + alias_schema = io[alias_name] # make copy of schema object # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs if schema.implicit: @@ -558,6 +558,10 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): have_step_param = step_param_name in step.params or step_param_name in step.cargo.defaults or \ schema.default is not None or schema.implicit is not None + # if the step parameter is set, mark our schema as having a default + if have_step_param: + alias_schema.default = Unresolved(f"{step_label}.{step_param_name}") + # alias becomes required if any step parameter it refers to was required, but wasn't already set if schema.required and not have_step_param: alias_schema.required = True @@ -620,7 +624,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # automatically make aliases for step parameters that are unset, and don't have a default, and aren't implict for label, step in self.steps.items(): for name, schema in step.inputs_outputs.items(): - if (label, name) not in self._alias_map and name not in step.params \ + if (label, name) not in self._alias_map and name not in step.params and name not in step.cargo.params \ and name not in step.cargo.defaults and schema.default is None \ and not schema.implicit: auto_name = f"{label}_{name}" @@ -668,6 +672,8 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu # update assignments self.update_assignments(self.assign, self.assign_based_on, params=params, location=self.fqname) + subst_outer = subst # outer dictionary is used to prevalidate our parameters + subst = SubstitutionNS() info = SubstitutionNS(fqname=self.fqname) # mutable=False means these sub-namespaces are not subject to {}-substitutions @@ -686,7 +692,7 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu # we call this twice, potentially, so define as a function def prevalidate_self(): try: - Cargo.prevalidate(self, params, subst=subst) + Cargo.prevalidate(self, params, subst=subst_outer) except ScabhaBaseException as exc: msg = f"recipe pre-validation failed: {exc}" errors.append(RecipeValidationError(msg, log=self.log)) @@ -695,6 +701,7 @@ def prevalidate_self(): subst.recipe._merge_(self.params) prevalidate_self() + params = self.params # propagate alias values up to substeps, except for implicit values (these only ever propagate down to us) for name, aliases in self._alias_list.items(): From 409be99771998712288b0f92988df3dd08dd922e Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 5 Mar 2022 21:07:11 +0200 Subject: [PATCH 08/37] more fixes to alias propagation --- stimela/kitchen/recipe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 75d4551b..0cff663d 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -726,6 +726,7 @@ def prevalidate_steps(): msg = f"step '{label}' failed pre-validation: {exc}" errors.append(RecipeValidationError(msg, log=self.log)) + subst.current._merge_(step.cargo.params) # these may hav echanged in prevalidation subst.previous = subst.current subst.steps[label] = subst.previous @@ -740,9 +741,9 @@ def prevalidate_steps(): from_step = False for alias in aliases: # if alias is set in step but not with us, mark it as propagating down - if alias.param in alias.step.params: + if alias.param in alias.step.cargo.params: alias.from_step = from_step = revalidate_self = True - self.params[name] = alias.step.params[alias.param] + self.params[name] = alias.step.cargo.params[alias.param] # and break out, we do this for the first matching step only break # if we propagated an input value down from a step, check if we need to propagate it up to any other steps From 512c5efb1231cf6baea033b7c9551425e92ca356 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 10 Mar 2022 19:38:38 +0200 Subject: [PATCH 09/37] finished cleanup of aliasing logic. Removed self.params from Cargo --- stimela/backends/native.py | 20 +-- stimela/commands/run.py | 4 +- stimela/kitchen/recipe.py | 233 ++++++++++++++++------------- stimela/kitchen/runners.py | 6 +- stimela/tests/test_aliasing.yml | 5 + stimela/tests/test_loop_recipe.yml | 3 +- stimela/tests/test_recipe.py | 2 +- stimela/tests/test_recipe.yml | 7 + 8 files changed, 158 insertions(+), 122 deletions(-) diff --git a/stimela/backends/native.py b/stimela/backends/native.py index 1e4618f2..1118f41f 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -5,7 +5,7 @@ from collections import OrderedDict from contextlib import redirect_stderr, redirect_stdout -from scabha.cargo import Cab +from scabha.cargo import Cab, Parameter from stimela import logger from stimela.utils.xrun_poll import xrun, dispatch_to_log from stimela.exceptions import StimelaCabRuntimeError @@ -30,7 +30,7 @@ def write(self, s): return len(s) -def run(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None): +def run(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None, batch=None): """Runs cab contents Args: @@ -45,13 +45,13 @@ def run(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None): # commands of form "(module)function" are a Python call match = re.match("^\((.+)\)(.+)$", cab.command) if match: - return run_callable(match.group(1), match.group(2), cab, log, subst) + return run_callable(match.group(1), match.group(2), cab, params, log, subst) # everything else is a shell command else: - return run_command(cab, log, subst) + return run_command(cab, params, log, subst) -def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[Dict[str, Any]] = None): +def run_callable(modulename: str, funcname: str, cab: Cab, log, params: Dict[str, Any], subst: Optional[Dict[str, Any]] = None): """Runs a cab corresponding to a Python callable. Intercepts stdout/stderr into the logger. Args: @@ -93,8 +93,8 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[ args = OrderedDict() for key, schema in cab.inputs_outputs.items(): if not schema.policies.skip: - if key in cab.params: - args[key] = cab.params[key] + if key in params: + args[key] = params[key] elif cab.get_schema_policy(schema, 'pass_missing_as_none'): args[key] = None @@ -118,7 +118,7 @@ def run_callable(modulename: str, funcname: str, cab: Cab, log, subst: Optional[ return retval -def run_command(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=None): +def run_command(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None, batch=None): """Runns command represented by cab. Args: @@ -133,11 +133,11 @@ def run_command(cab: Cab, log, subst: Optional[Dict[str, Any]] = None, batch=Non int: return value (e.g. exit code) of command """ # build command line from parameters - args, venv = cab.build_command_line(subst) + args, venv = cab.build_command_line(params, subst) if batch: batch = SlurmBatch(**batch) - batch.__init_cab__(cab, subst, log) + batch.__init_cab__(cab, params, subst, log) runcmd = "/bin/bash -c" + " ".join(args) jobfile = "foo-bar.job" batch.name = "foo-bar" diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 7a956883..a115d794 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -180,7 +180,7 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, log.info("pre-validating the recipe") outer_step = Step(recipe=recipe, name=f"{recipe_name}", info=what, params=params) try: - outer_step.prevalidate() + params = outer_step.prevalidate() except ScabhaBaseException as exc: if not exc.logged: log.error(f"pre-validation failed: {exc}") @@ -273,7 +273,7 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, # in debug mode, pretty-print the recipe if log.isEnabledFor(logging.DEBUG): log.debug("---------- prevalidated step follows ----------") - for line in outer_step.summary(): + for line in outer_step.summary(params=params): log.debug(line) if dry_run: diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 0cff663d..d319ab9e 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -5,7 +5,6 @@ from typing import Any, Tuple, List, Dict, Optional, Union from enum import Enum from dataclasses import dataclass - from omegaconf import MISSING, OmegaConf, DictConfig, ListConfig from collections import OrderedDict from collections.abc import Mapping @@ -14,28 +13,25 @@ from pathos.pools import ProcessPool from pathos.serial import SerialPool from multiprocessing import cpu_count - -from scabha.exceptions import SubstitutionError, SubstitutionErrorList from stimela.config import EmptyDictDefault, EmptyListDefault, StimelaLogConfig import stimela from stimela import logger, stimelogging - from stimela.exceptions import * - from scabha import validate +import scabha.exceptions +from scabha.exceptions import SubstitutionError, SubstitutionErrorList from scabha.validate import Unresolved, join_quote from scabha.substitutions import SubstitutionNS, substitutions_from - -from . import runners - -Conditional = Optional[str] - from scabha.cargo import Parameter, Cargo, Cab, Batch from scabha.types import File, Directory, MS +from . import runners +Conditional = Optional[str] - +class DeferredAlias(Unresolved): + """Class used as placeholder for deferred alias lookup (i.e. before an aliased value is available)""" + pass @dataclass class Step: @@ -64,34 +60,32 @@ def __post_init__(self): if bool(self.cab) == bool(self.recipe): raise StepValidationError("step must specify either a cab or a nested recipe, but not both") self.cargo = self.config = None - self._prevalidated = None self.tags = set(self.tags) # convert params into stadard dict, else lousy stuff happens when we insert non-standard objects if isinstance(self.params, DictConfig): self.params = OmegaConf.to_container(self.params) + # after (pre)validation, this contains parameter values + self.validated_params = None - def summary(self, recursive=True): - return self.cargo and self.cargo.summary(recursive=recursive) + def summary(self, params=None, recursive=True): + return self.cargo and self.cargo.summary(recursive=recursive, params=params or self.validated_params or self.params) @property def finalized(self): return self.cargo is not None - @property - def prevalidated(self): - return self._prevalidated - @property def missing_params(self): - return self.cargo.missing_params + return OrderedDict([(name, schema) for name, schema in self.cargo.inputs_outputs.items() + if schema.required and name not in self.validated_params]) @property def invalid_params(self): - return self.cargo.invalid_params + return [name for name, value in self.validated_params.items() if isinstance(value, scabha.exceptions.Error)] @property def unresolved_params(self): - return self.cargo.unresolved_params + return [name for name, value in self.validated_params.items() if isinstance(value, Unresolved)] @property def inputs(self): @@ -122,10 +116,6 @@ def nesting(self): def update_parameter(self, name, value): self.params[name] = value - # only pass value up to cargo if has already been validated. This avoids redefinition errors from nested aliases. - # otherwise, just keep the value in our dict (cargo will get it upon validation) - if self.cargo is not None and self.prevalidated: - self.cargo.update_parameter(name, value) def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): if not self.finalized: @@ -170,16 +160,15 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): def prevalidate(self, subst: Optional[SubstitutionNS]=None): - if not self.prevalidated: - self.finalize() - # validate cab or recipe - self.cargo.prevalidate(self.params, subst) - self.log.debug(f"{self.cargo.name}: {len(self.missing_params)} missing, " - f"{len(self.invalid_params)} invalid and " - f"{len(self.unresolved_params)} unresolved parameters") - if self.invalid_params: - raise StepValidationError(f"{self.cargo.name} has the following invalid parameters: {join_quote(self.invalid_params)}") - self._prevalidated = True + self.finalize() + # validate cab or recipe + params = self.validated_params = self.cargo.prevalidate(self.params, subst) + self.log.debug(f"{self.cargo.name}: {len(self.missing_params)} missing, " + f"{len(self.invalid_params)} invalid and " + f"{len(self.unresolved_params)} unresolved parameters") + if self.invalid_params: + raise StepValidationError(f"{self.cargo.name} has the following invalid parameters: {join_quote(self.invalid_params)}") + return params def log_summary(self, level, title, color=None): extra = dict(color=color, boldface=True) @@ -189,11 +178,22 @@ def log_summary(self, level, title, color=None): for line in self.summary(recursive=False): self.log.log(level, line, extra=extra) - def run(self, params=None, subst=None, batch=None): + def run(self, subst=None, batch=None): """Runs the step""" - self.prevalidate() - if params is None: - params = self.params + if self.validated_params is None: + self.prevalidate(self.params) + + # Since prevalidation will have populated default values for potentially missing parameters, use those values + # For parameters that aren't missing, use whatever value that was suplied + params = self.validated_params.copy() + params.update(**self.params) + + # # However the unresolved ones should be + # params = self.validated_params + # for name, value in params.items(): + # if type(value) is Unresolved: + # params[name] = self.params[name] + # # params = self.params skip_warned = False # becomes True when warnings are given @@ -221,6 +221,8 @@ def run(self, params=None, subst=None, batch=None): else: raise + self.validated_params.update(**params) + # log inputs if validated and not self.skip: self.log_summary(logging.INFO, "validated inputs", color="GREEN") @@ -228,8 +230,8 @@ def run(self, params=None, subst=None, batch=None): subst.current = params # bomb out if some inputs failed to validate or substitutions resolve - if self.cargo.invalid_params or self.cargo.unresolved_params: - invalid = self.cargo.invalid_params + self.cargo.unresolved_params + if self.invalid_params or self.unresolved_params: + invalid = self.invalid_params + self.unresolved_params if self.skip: self.log.warning(f"invalid inputs: {join_quote(invalid)}") if not skip_warned: @@ -242,10 +244,10 @@ def run(self, params=None, subst=None, batch=None): try: if type(self.cargo) is Recipe: self.cargo.backend = self.cargo.backend or self.backend - self.cargo._run() + self.cargo._run(params) elif type(self.cargo) is Cab: self.cargo.backend = self.cargo.backend or self.backend - runners.run_cab(self.cargo, log=self.log, subst=subst, batch=batch) + runners.run_cab(self.cargo, params, log=self.log, subst=subst, batch=batch) else: raise RuntimeError("Unknown cargo type") except ScabhaBaseException as exc: @@ -277,14 +279,14 @@ def run(self, params=None, subst=None, batch=None): self.log_summary(level, "failed outputs", color="WARNING") raise - if subst is not None: - subst.current._merge_(params) - if validated: + self.validated_params.update(**params) + if subst is not None: + subst.current._merge_(params) self.log_summary(logging.DEBUG, "validated outputs") # bomb out if an output was invalid - invalid = [name for name in self.cargo.invalid_params + self.cargo.unresolved_params if name in self.cargo.outputs] + invalid = [name for name in self.invalid_params + self.unresolved_params if name in self.cargo.outputs] if invalid: if self.skip: self.log.warning(f"invalid outputs: {join_quote(invalid)}") @@ -292,7 +294,7 @@ def run(self, params=None, subst=None, batch=None): else: raise StepValidationError(f"invalid outputs: {join_quote(invalid)}", log=self.log) - return self.cargo.params + return params @dataclass class ForLoopClause(object): @@ -312,7 +314,6 @@ class Recipe(Cargo): Additional attributes available after validation with arguments are as per for a Cab: self.input_output: combined parameter dict (self.input + self.output), maps name to Parameter - self.missing_params: dict (name to Parameter) of required parameters that have not been specified Raises: various classes of validation errors @@ -379,6 +380,7 @@ def __post_init__ (self): self._alias_map = None # set of keys protected from assignment self._protected_from_assign = set() + self._for_loop_values = None def protect_from_assignments(self, keys): self._protected_from_assign.update(keys) @@ -401,9 +403,7 @@ def validate_assignments(self, assign, assign_based_on, location): # if key in io: # raise RecipeValidationError(f"'{location}.{assign_label}.{key}' clashes with an {io_label}") - def update_assignments(self, assign, assign_based_on, params=None, location=""): - if params is None: - params = self.params + def update_assignments(self, assign, assign_based_on, params, location=""): for basevar, value_list in assign_based_on.items(): # make sure the base variable is defined if basevar in assign: @@ -560,7 +560,7 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): # if the step parameter is set, mark our schema as having a default if have_step_param: - alias_schema.default = Unresolved(f"{step_label}.{step_param_name}") + alias_schema.default = DeferredAlias(f"{step_label}.{step_param_name}") # alias becomes required if any step parameter it refers to was required, but wasn't already set if schema.required and not have_step_param: @@ -624,7 +624,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # automatically make aliases for step parameters that are unset, and don't have a default, and aren't implict for label, step in self.steps.items(): for name, schema in step.inputs_outputs.items(): - if (label, name) not in self._alias_map and name not in step.params and name not in step.cargo.params \ + if (label, name) not in self._alias_map and name not in step.params \ and name not in step.cargo.defaults and schema.default is None \ and not schema.implicit: auto_name = f"{label}_{name}" @@ -650,9 +650,9 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): else: raise RecipeValidationError(f"for_loop: over is of invalid type {type(self.for_loop.over)}", log=log) - # insert empty loop variable - if self.for_loop.var not in self.assign: - self.assign[self.for_loop.var] = "" + # # insert empty loop variable + # if self.for_loop.var not in self.assign: + # self.assign[self.for_loop.var] = "" def _prep_step(self, label, step, subst): parts = label.split("-") @@ -681,7 +681,7 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu subst._add_('config', self.config, nosubst=True) subst._add_('steps', {}, nosubst=True) subst._add_('previous', {}, nosubst=True) - subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) + subst._add_('recipe', self.make_substitition_namespace(params=params, ns=self.assign)) subst.recipe._merge_(params) # add for-loop variable to inputs, if expected there @@ -690,34 +690,39 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu # prevalidate our own parameters. This substitutes in defaults and does {}-substitutions # we call this twice, potentially, so define as a function - def prevalidate_self(): + def prevalidate_self(params): try: - Cargo.prevalidate(self, params, subst=subst_outer) + params = Cargo.prevalidate(self, params, subst=subst_outer) + # validate for-loop, if needed + self.validate_for_loop(params, strict=False) + except ScabhaBaseException as exc: msg = f"recipe pre-validation failed: {exc}" errors.append(RecipeValidationError(msg, log=self.log)) # merge again, since values may have changed - subst.recipe._merge_(self.params) + subst.recipe._merge_(params) + return params - prevalidate_self() - params = self.params + params = prevalidate_self(params) # propagate alias values up to substeps, except for implicit values (these only ever propagate down to us) for name, aliases in self._alias_list.items(): - if name in self.params and name not in self._implicit_params: + if name in params and type(params[name]) is not DeferredAlias and name not in self._implicit_params: for alias in aliases: alias.from_recipe = True - alias.step.update_parameter(alias.param, self.params[name]) + alias.step.update_parameter(alias.param, params[name]) # prevalidate step parameters # we call this twice, potentially, so define as a function + def prevalidate_steps(): for label, step in self.steps.items(): self._prep_step(label, step, subst) try: - step.prevalidate(subst) + step_params = step.prevalidate(subst) + subst.current._merge_(step_params) # these may have changed in prevalidation except ScabhaBaseException as exc: if type(exc) is SubstitutionErrorList: self.log.error(f"unresolved {{}}-substitution(s):") @@ -726,7 +731,6 @@ def prevalidate_steps(): msg = f"step '{label}' failed pre-validation: {exc}" errors.append(RecipeValidationError(msg, log=self.log)) - subst.current._merge_(step.cargo.params) # these may hav echanged in prevalidation subst.previous = subst.current subst.steps[label] = subst.previous @@ -736,14 +740,14 @@ def prevalidate_steps(): if not errors: revalidate_self = revalidate_steps = False for name, aliases in self._alias_list.items(): - # propagate up if alias is not set, or it is implicit=True (meaning it gets set from an implicit substep parameter) - if name not in self.params or type(self.inputs_outputs[name].implicit) is Unresolved: + # propagate up if alias is not set, or it is implicit=Unresolved (meaning it gets set from an implicit substep parameter) + if name not in params or type(params[name]) is DeferredAlias or type(self.inputs_outputs[name].implicit) is Unresolved: from_step = False for alias in aliases: # if alias is set in step but not with us, mark it as propagating down - if alias.param in alias.step.cargo.params: + if alias.param in alias.step.validated_params: alias.from_step = from_step = revalidate_self = True - self.params[name] = alias.step.cargo.params[alias.param] + params[name] = alias.step.validated_params[alias.param] # and break out, we do this for the first matching step only break # if we propagated an input value down from a step, check if we need to propagate it up to any other steps @@ -752,17 +756,18 @@ def prevalidate_steps(): for alias in aliases: if not alias.from_step: alias.from_recipe = revalidate_steps = True - alias.step.update_parameter(alias.param, self.params[name]) + alias.step.update_parameter(alias.param, params[name]) # do we or any steps need to be revalidated? if revalidate_self: - prevalidate_self() + params = prevalidate_self(params) if revalidate_steps: prevalidate_steps() # check for missing parameters - if self.missing_params: - msg = f"""recipe '{self.name}' is missing the following required parameters: {join_quote(self.missing_params)}""" + missing_params = [name for name, schema in self.inputs_outputs.items() if schema.required and name not in params] + if missing_params: + msg = f"""recipe '{self.name}' is missing the following required parameters: {join_quote(missing_params)}""" errors.append(RecipeValidationError(msg, log=self.log)) if errors: @@ -770,30 +775,49 @@ def prevalidate_steps(): self.log.debug("recipe pre-validated") - def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS]=None, loosely=False): + return params + + def validate_for_loop(self, params, strict=False): # in case of for loops, get list of values to be iterated over if self.for_loop is not None: - # if over != None (see finalize() above), list of values needs to be looked up in inputs + # if over != None (see finalize() above), list of values needs to be looked `up in inputs + # if it is None, then an explicit list was supplied and is already in self._for_loop_values. if self.for_loop.over is not None: - self._for_loop_values = self.params[self.for_loop.over] - if not isinstance(self._for_loop_values, (list, tuple)): - self._for_loop_values = [self._for_loop_values] - self.log.info(f"recipe is a for-loop with '{self.for_loop.var}' iterating over {len(self._for_loop_values)} values") - self.log.info(f"Loop values: {self._for_loop_values}") - # add first value to inputs, if needed - if self.for_loop.var in self.inputs and self._for_loop_values: + # check that it's legal + if self.for_loop.over in self.assign: + values = self.assign[self.for_loop.over] + elif self.for_loop.over in params: + values = params[self.for_loop.over] + elif self.for_loop.over not in self.inputs: + raise ParameterValidationError(f"for_loop.over={self.for_loop.over} does not refer to a known parameter") + else: + raise ParameterValidationError(f"for_loop.over={self.for_loop.over} is unset") + if strict and isinstance(values, Unresolved): + raise ParameterValidationError(f"for_loop.over={self.for_loop.over} is unresolved") + if not isinstance(values, (list, tuple)): + values = [values] + if self._for_loop_values is None: + self.log.info(f"recipe is a for-loop with '{self.for_loop.var}' iterating over {len(values)} values") + self.log.info(f"Loop values: {values}") + self._for_loop_values = values + if self.for_loop.var in self.inputs: params[self.for_loop.var] = self._for_loop_values[0] + else: + self.assign[self.for_loop.var] = self._for_loop_values[0] # else fake a single-value list else: self._for_loop_values = [None] - # add 'recipe' substitutions + def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS]=None, loosely=False): + + self.validate_for_loop(params, strict=True) + if subst is None: subst = SubstitutionNS() info = SubstitutionNS(fqname=self.fqname) subst._add_('info', info, nosubst=True) subst._add_('config', self.config, nosubst=True) - subst._add_('recipe', self.make_substitition_namespace(ns=self.assign)) + subst._add_('recipe', self.make_substitition_namespace(params=params, ns=self.assign)) return Cargo.validate_inputs(self, params, subst=subst, loosely=loosely) @@ -819,11 +843,11 @@ def _link_steps(self): step.next_step = None step.previous_step = steps[i-2] - def summary(self, recursive=True): + def summary(self, params: Dict[str, Any], recursive=True): """Returns list of lines with a summary of the recipe state """ - lines = [f"recipe '{self.name}':"] + [f" {name} = {value}" for name, value in self.params.items()] + \ - [f" {name} = ???" for name in self.missing_params] + lines = [f"recipe '{self.name}':"] + [f" {name} = {value}" for name, value in params.items()] + \ + [f" {name} = ???" for name in self.inputs_outputs if name not in params] if recursive: lines.append(" steps:") for name, step in self.steps.items(): @@ -834,7 +858,7 @@ def summary(self, recursive=True): _root_recipe_ns = None - def _run(self) -> Dict[str, Any]: + def _run(self, params) -> Dict[str, Any]: """Internal recipe run method. Meant to be called from a wrapper Step object (which validates the parameters, etc.) Parameters @@ -857,7 +881,7 @@ def _run(self) -> Dict[str, Any]: subst._add_('info', info, nosubst=True) subst._add_('steps', {}, nosubst=True) subst._add_('previous', {}, nosubst=True) - recipe_ns = self.make_substitition_namespace(ns=self.assign) + recipe_ns = self.make_substitition_namespace(params=params, ns=self.assign) subst._add_('recipe', recipe_ns) # merge in config sections, except "recipe" which clashes with our namespace @@ -885,9 +909,9 @@ def _run(self) -> Dict[str, Any]: # our inputs have been validated, so propagate aliases to steps. Check for missing stuff just in case for name, schema in self.inputs.items(): - if name in self.params: - value = self.params[name] - if type(value) is validate.Unresolved: + if name in params: + value = params[name] + if isinstance(value, Unresolved): raise RecipeValidationError(f"recipe '{self.name}' has unresolved input '{name}'", log=self.log) # propagate up all aliases for alias in self._alias_list.get(name, []): @@ -972,21 +996,22 @@ def loop_worker(inst, step, label, subst, count, iter_var): for name, aliases in self._alias_list.items(): for alias in aliases: if alias.from_step: - self.params[name] = alias.step.params[alias.param] + if alias.param in alias.step.validated_params: + params[name] = alias.step.validated_params[alias.param] self.log.info(f"recipe '{self.name}' executed successfully") - return OrderedDict((name, value) for name, value in self.params.items() if name in self.outputs) + return OrderedDict((name, value) for name, value in params.items() if name in self.outputs) - def run(self, **params) -> Dict[str, Any]: - """Public interface for running a step. Keywords are passed in as step parameters + # def run(self, **params) -> Dict[str, Any]: + # """Public interface for running a step. Keywords are passed in as step parameters - Returns - ------- - Dict[str, Any] - Dictionary of formal outputs - """ - return Step(recipe=self, params=params, info=f"wrapper step for recipe '{self.name}'").run() + # Returns + # ------- + # Dict[str, Any] + # Dictionary of formal outputs + # """ + # return Step(recipe=self, params=params, info=f"wrapper step for recipe '{self.name}'").run() class PyRecipe(Recipe): diff --git a/stimela/kitchen/runners.py b/stimela/kitchen/runners.py index 20c09f0f..20de8285 100644 --- a/stimela/kitchen/runners.py +++ b/stimela/kitchen/runners.py @@ -1,13 +1,13 @@ import shlex from typing import Dict, Optional, Any -from scabha.cargo import Cab, Batch +from scabha.cargo import Cab, Batch, Parameter from stimela import logger from stimela.utils.xrun_poll import xrun from stimela.exceptions import StimelaCabRuntimeError -def run_cab(cab: Cab, log=None, subst: Optional[Dict[str, Any]] = None, batch: Batch=None): +def run_cab(cab: Cab, params: Dict[str, Any], log=None, subst: Optional[Dict[str, Any]] = None, batch: Batch=None): log = log or logger() backend = __import__(f"stimela.backends.{cab.backend.name}", fromlist=[cab.backend.name]) - return backend.run(cab, log=log, subst=subst, batch=batch) + return backend.run(cab, params=params, log=log, subst=subst, batch=batch) diff --git a/stimela/tests/test_aliasing.yml b/stimela/tests/test_aliasing.yml index 5d000bbf..8f129b58 100644 --- a/stimela/tests/test_aliasing.yml +++ b/stimela/tests/test_aliasing.yml @@ -26,6 +26,11 @@ cabs: must_exist: false default: out2 +opts: + log: + dir: test-logs/logs-{config.run.datetime} + nest: 3 + symlink: logs recipe: name: "alias test recipe" diff --git a/stimela/tests/test_loop_recipe.yml b/stimela/tests/test_loop_recipe.yml index c177d4ba..081928af 100644 --- a/stimela/tests/test_loop_recipe.yml +++ b/stimela/tests/test_loop_recipe.yml @@ -53,7 +53,7 @@ lib: opts: log: - dir: logs-{config.run.datetime} + dir: test-logs/logs-{config.run.datetime} nest: 3 symlink: logs @@ -72,7 +72,6 @@ cubical_image_loop: image-prefix: "{recipe.dir.out}/im{info.suffix}-{recipe.loop-name}/im{info.suffix}-{recipe.loop-name}" loop-name: "s{recipe.loop:02d}" log: - dir: logs-{config.run.datetime} name: log-{recipe.loop-name}-{info.fqname}.txt for_loop: diff --git a/stimela/tests/test_recipe.py b/stimela/tests/test_recipe.py index ccbf67fa..d9b3e58c 100644 --- a/stimela/tests/test_recipe.py +++ b/stimela/tests/test_recipe.py @@ -48,7 +48,7 @@ def test_test_recipe(): def test_test_loop_recipe(): print("===== expecting an error since 'ms' parameter is missing =====") - retcode = os.system("stimela -v exec test_loop_recipe.yml cubical_image") + retcode = os.system("stimela -v exec test_loop_recipe.yml cubical_image_loop") assert retcode != 0 print("===== expecting no errors now =====") diff --git a/stimela/tests/test_recipe.yml b/stimela/tests/test_recipe.yml index aa82b8ff..1076e40a 100644 --- a/stimela/tests/test_recipe.yml +++ b/stimela/tests/test_recipe.yml @@ -66,6 +66,13 @@ cabs: dtype: str required: true +opts: + log: + dir: test-logs/logs-{config.run.datetime} + nest: 3 + symlink: logs + + recipe: name: "demo recipe" info: 'top level recipe definition' From 8ce8950e2b6fa73c94509b10e0bd85fb023d17ea Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 10 Mar 2022 19:56:49 +0200 Subject: [PATCH 10/37] fixed run_callable --- stimela/backends/native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stimela/backends/native.py b/stimela/backends/native.py index 1118f41f..fba72fdf 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -51,7 +51,7 @@ def run(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = return run_command(cab, params, log, subst) -def run_callable(modulename: str, funcname: str, cab: Cab, log, params: Dict[str, Any], subst: Optional[Dict[str, Any]] = None): +def run_callable(modulename: str, funcname: str, cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, Any]] = None): """Runs a cab corresponding to a Python callable. Intercepts stdout/stderr into the logger. Args: From 6c616452e9a802cb06c950f32e7415c31661f4f1 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 16 Mar 2022 13:13:09 +0200 Subject: [PATCH 11/37] do not summarize missing parameters when validated successfully --- stimela/kitchen/recipe.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index d319ab9e..a723314c 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -67,8 +67,9 @@ def __post_init__(self): # after (pre)validation, this contains parameter values self.validated_params = None - def summary(self, params=None, recursive=True): - return self.cargo and self.cargo.summary(recursive=recursive, params=params or self.validated_params or self.params) + def summary(self, params=None, recursive=True, ignore_missing=False): + return self.cargo and self.cargo.summary(recursive=recursive, + params=params or self.validated_params or self.params, ignore_missing=ignore_missing) @property def finalized(self): @@ -170,12 +171,12 @@ def prevalidate(self, subst: Optional[SubstitutionNS]=None): raise StepValidationError(f"{self.cargo.name} has the following invalid parameters: {join_quote(self.invalid_params)}") return params - def log_summary(self, level, title, color=None): + def log_summary(self, level, title, color=None, ignore_missing=False): extra = dict(color=color, boldface=True) if self.log.isEnabledFor(level): self.log.log(level, f"### {title}", extra=extra) del extra['boldface'] - for line in self.summary(recursive=False): + for line in self.summary(recursive=False, ignore_missing=ignore_missing): self.log.log(level, line, extra=extra) def run(self, subst=None, batch=None): @@ -225,7 +226,7 @@ def run(self, subst=None, batch=None): # log inputs if validated and not self.skip: - self.log_summary(logging.INFO, "validated inputs", color="GREEN") + self.log_summary(logging.INFO, "validated inputs", color="GREEN", ignore_missing=True) if subst is not None: subst.current = params @@ -283,7 +284,7 @@ def run(self, subst=None, batch=None): self.validated_params.update(**params) if subst is not None: subst.current._merge_(params) - self.log_summary(logging.DEBUG, "validated outputs") + self.log_summary(logging.DEBUG, "validated outputs", ignore_missing=True) # bomb out if an output was invalid invalid = [name for name in self.invalid_params + self.unresolved_params if name in self.cargo.outputs] @@ -843,11 +844,12 @@ def _link_steps(self): step.next_step = None step.previous_step = steps[i-2] - def summary(self, params: Dict[str, Any], recursive=True): + def summary(self, params: Dict[str, Any], recursive=True, ignore_missing=False): """Returns list of lines with a summary of the recipe state """ - lines = [f"recipe '{self.name}':"] + [f" {name} = {value}" for name, value in params.items()] + \ - [f" {name} = ???" for name in self.inputs_outputs if name not in params] + lines = [f"recipe '{self.name}':"] + [f" {name} = {value}" for name, value in params.items()] + if not ignore_missing: + lines += [f" {name} = ???" for name in self.inputs_outputs if name not in params] if recursive: lines.append(" steps:") for name, step in self.steps.items(): From a3472468fadd686db6f1fe47276f5f7c9f043ef1 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Fri, 18 Mar 2022 13:13:54 +0200 Subject: [PATCH 12/37] fixes #23 --- stimela/kitchen/recipe.py | 115 +++++++++++++++++--------------- stimela/tests/test_aliasing.yml | 7 +- stimela/tests/test_recipe.py | 2 +- 3 files changed, 69 insertions(+), 55 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index a723314c..e412c5f3 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -1,9 +1,6 @@ -import time -import copy from multiprocessing import cpu_count -import os, os.path, re, logging +import os, os.path, re, logging, fnmatch, copy from typing import Any, Tuple, List, Dict, Optional, Union -from enum import Enum from dataclasses import dataclass from omegaconf import MISSING, OmegaConf, DictConfig, ListConfig from collections import OrderedDict @@ -25,6 +22,7 @@ from scabha.cargo import Parameter, Cargo, Cab, Batch from scabha.types import File, Directory, MS + from . import runners Conditional = Optional[str] @@ -514,61 +512,72 @@ class AliasInfo(object): def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): if type(alias_target) is str: - step_label, step_param_name = alias_target.split('.', 1) - step = self.steps.get(step_label) - else: - step, step_label, step_param_name = alias_target - - if step is None: - raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step '{step_label}'", log=self.log) - # is the alias already defined - existing_alias = self._alias_list.get(alias_name, [None])[0] - # find it in inputs or outputs - input_schema = step.inputs.get(step_param_name) - output_schema = step.outputs.get(step_param_name) - schema = input_schema or output_schema - if schema is None: - raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) - # implicit inuts cannot be aliased - if input_schema and input_schema.implicit: - raise RecipeValidationError(f"alias '{alias_name}' refers to implicit input '{step_label}.{step_param_name}'", log=self.log) - # if alias is already defined, check for conflicts - if existing_alias is not None: - io = existing_alias.io - if io is self.outputs: - raise RecipeValidationError(f"output alias '{alias_name}' is defined more than once", log=self.log) - elif output_schema: - raise RecipeValidationError(f"alias '{alias_name}' refers to both an input and an output", log=self.log) - alias_schema = io[alias_name] - # now we know it's a multiply-defined input, check for type consistency - if alias_schema.dtype != schema.dtype: - raise RecipeValidationError(f"alias '{alias_name}': dtype {schema.dtype} of '{step_label}.{step_param_name}' doesn't match previous dtype {existing_schema.dtype}", log=self.log) - - # else alias not yet defined, insert a schema + step_spec, step_param_name = alias_target.split('.', 1) + + # treat label as a "(cabtype)" specifier? + if re.match('^\(.+\)$', step_spec): + steps = [(label, step) for label, step in self.steps.items() if isinstance(step.cargo, Cab) and step.cab == step_spec[1:-1]] + # treat label as a wildcard? + elif any(ch in step_spec for ch in '*?['): + steps = [(label, step) for label, step in self.steps.items() if fnmatch.fnmatchcase(label, step_spec)] + # else treat label as a specific step name + else: + steps = [(step_spec, self.steps.get(step_spec))] else: - io = self.inputs if input_schema else self.outputs - io[alias_name] = copy.copy(schema) - alias_schema = io[alias_name] # make copy of schema object + step, step_spec, step_param_name = alias_target + steps = [(step_spec, step)] + + for (step_label, step) in steps: + if step is None: + raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step '{step_label}'", log=self.log) + # is the alias already defined + existing_alias = self._alias_list.get(alias_name, [None])[0] + # find it in inputs or outputs + input_schema = step.inputs.get(step_param_name) + output_schema = step.outputs.get(step_param_name) + schema = input_schema or output_schema + if schema is None: + raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) + # implicit inuts cannot be aliased + if input_schema and input_schema.implicit: + raise RecipeValidationError(f"alias '{alias_name}' refers to implicit input '{step_label}.{step_param_name}'", log=self.log) + # if alias is already defined, check for conflicts + if existing_alias is not None: + io = existing_alias.io + if io is self.outputs: + raise RecipeValidationError(f"output alias '{alias_name}' is defined more than once", log=self.log) + elif output_schema: + raise RecipeValidationError(f"alias '{alias_name}' refers to both an input and an output", log=self.log) + alias_schema = io[alias_name] + # now we know it's a multiply-defined input, check for type consistency + if alias_schema.dtype != schema.dtype: + raise RecipeValidationError(f"alias '{alias_name}': dtype {schema.dtype} of '{step_label}.{step_param_name}' doesn't match previous dtype {existing_schema.dtype}", log=self.log) + + # else alias not yet defined, insert a schema + else: + io = self.inputs if input_schema else self.outputs + io[alias_name] = copy.copy(schema) + alias_schema = io[alias_name] # make copy of schema object - # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs - if schema.implicit: - alias_schema.implicit = Unresolved(f"{step_label}.{step_param_name}") # will be resolved when propagated from step - self._implicit_params.add(alias_name) + # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs + if schema.implicit: + alias_schema.implicit = Unresolved(f"{step_label}.{step_param_name}") # will be resolved when propagated from step + self._implicit_params.add(alias_name) - # this is True if the step's parameter is defined in any way (set, default, or implicit) - have_step_param = step_param_name in step.params or step_param_name in step.cargo.defaults or \ - schema.default is not None or schema.implicit is not None + # this is True if the step's parameter is defined in any way (set, default, or implicit) + have_step_param = step_param_name in step.params or step_param_name in step.cargo.defaults or \ + schema.default is not None or schema.implicit is not None - # if the step parameter is set, mark our schema as having a default - if have_step_param: - alias_schema.default = DeferredAlias(f"{step_label}.{step_param_name}") + # if the step parameter is set, mark our schema as having a default + if have_step_param: + alias_schema.default = DeferredAlias(f"{step_label}.{step_param_name}") - # alias becomes required if any step parameter it refers to was required, but wasn't already set - if schema.required and not have_step_param: - alias_schema.required = True + # alias becomes required if any step parameter it refers to was required, but wasn't already set + if schema.required and not have_step_param: + alias_schema.required = True - self._alias_map[step_label, step_param_name] = alias_name - self._alias_list.setdefault(alias_name, []).append(Recipe.AliasInfo(step_label, step, step_param_name, io)) + self._alias_map[step_label, step_param_name] = alias_name + self._alias_list.setdefault(alias_name, []).append(Recipe.AliasInfo(step_label, step, step_param_name, io)) def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): if not self.finalized: diff --git a/stimela/tests/test_aliasing.yml b/stimela/tests/test_aliasing.yml index 8f129b58..8c9d4068 100644 --- a/stimela/tests/test_aliasing.yml +++ b/stimela/tests/test_aliasing.yml @@ -5,7 +5,6 @@ cabs: policies: skip_implicits: false key_value: true - # simms inputs: a: dtype: str @@ -16,6 +15,10 @@ cabs: dtype: str d: implicit: "d_is_implicit" + e: + dtype: str + f: + dtype: str outputs: out: dtype: File @@ -39,6 +42,8 @@ recipe: b: [s1.b, s2.b] out: [s4.out] out2: [s3.out2] + e: ['s[12].e'] + f: ['(echo).f'] steps: s1: diff --git a/stimela/tests/test_recipe.py b/stimela/tests/test_recipe.py index d9b3e58c..97782f6d 100644 --- a/stimela/tests/test_recipe.py +++ b/stimela/tests/test_recipe.py @@ -28,7 +28,7 @@ def test_test_aliasing(): assert retcode != 0 print("===== expecting no errors now =====") - retcode, output = run("stimela -v exec test_aliasing.yml a=1 s3_a=1 s4_a=1") + retcode, output = run("stimela -v exec test_aliasing.yml a=1 s3_a=1 s4_a=1 e=e f=f") assert retcode == 0 print(output) assert verify_output(output, From 5b501ff1840209a3e735dd86fc8603ef267620ef Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Fri, 18 Mar 2022 13:22:59 +0200 Subject: [PATCH 13/37] fixes #23 but allows failed lookups for wildcards --- stimela/kitchen/recipe.py | 8 ++++++-- stimela/tests/test_aliasing.yml | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index e412c5f3..7c2d512e 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -511,15 +511,16 @@ class AliasInfo(object): from_step: bool = False # if True, value propagates from step down to recipe def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): + wildcards = False if type(alias_target) is str: step_spec, step_param_name = alias_target.split('.', 1) - # treat label as a "(cabtype)" specifier? if re.match('^\(.+\)$', step_spec): steps = [(label, step) for label, step in self.steps.items() if isinstance(step.cargo, Cab) and step.cab == step_spec[1:-1]] # treat label as a wildcard? elif any(ch in step_spec for ch in '*?['): steps = [(label, step) for label, step in self.steps.items() if fnmatch.fnmatchcase(label, step_spec)] + wildcards = True # else treat label as a specific step name else: steps = [(step_spec, self.steps.get(step_spec))] @@ -537,7 +538,10 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): output_schema = step.outputs.get(step_param_name) schema = input_schema or output_schema if schema is None: - raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) + if wildcards: + continue + else: + raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) # implicit inuts cannot be aliased if input_schema and input_schema.implicit: raise RecipeValidationError(f"alias '{alias_name}' refers to implicit input '{step_label}.{step_param_name}'", log=self.log) diff --git a/stimela/tests/test_aliasing.yml b/stimela/tests/test_aliasing.yml index 8c9d4068..dfdbaa4e 100644 --- a/stimela/tests/test_aliasing.yml +++ b/stimela/tests/test_aliasing.yml @@ -44,6 +44,7 @@ recipe: out2: [s3.out2] e: ['s[12].e'] f: ['(echo).f'] + g: ['xx*.g'] steps: s1: From ccbb82c7d4605f2181360763c52d733fde732023 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Fri, 18 Mar 2022 14:00:55 +0200 Subject: [PATCH 14/37] ensure assign_based_on lookups are converted to string --- stimela/kitchen/recipe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 7c2d512e..64bf4758 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -406,11 +406,11 @@ def update_assignments(self, assign, assign_based_on, params, location=""): for basevar, value_list in assign_based_on.items(): # make sure the base variable is defined if basevar in assign: - value = assign[basevar] + value = str(assign[basevar]) elif basevar in params: - value = params[basevar] + value = str(params[basevar]) elif basevar in self.inputs_outputs and self.inputs_outputs[basevar].default is not None: - value = self.inputs_outputs[basevar].default + value = str(self.inputs_outputs[basevar].default) else: raise AssignmentError(f"{location}.assign_based_on.{basevar} is an unset variable or parameter") # look up list of assignments From 95301d28d94b8f91c1d268764e8ec5c6f4fe4689 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Fri, 18 Mar 2022 16:52:06 +0200 Subject: [PATCH 15/37] aias with a default at recipe level should override step default --- stimela/kitchen/recipe.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 64bf4758..7cfc5f23 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -560,8 +560,14 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): # else alias not yet defined, insert a schema else: io = self.inputs if input_schema else self.outputs + # having an own default in the schema overrides the step's default + own_default = None + if alias_name in io and io[alias_name].default is not None: + own_default = io[alias_name].default io[alias_name] = copy.copy(schema) alias_schema = io[alias_name] # make copy of schema object + if own_default is not None: + alias_schema.default = own_default # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs if schema.implicit: @@ -572,8 +578,8 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): have_step_param = step_param_name in step.params or step_param_name in step.cargo.defaults or \ schema.default is not None or schema.implicit is not None - # if the step parameter is set, mark our schema as having a default - if have_step_param: + # if the step parameter is set and ours isn't, mark our schema as having a default + if have_step_param and alias_schema.default is not None: alias_schema.default = DeferredAlias(f"{step_label}.{step_param_name}") # alias becomes required if any step parameter it refers to was required, but wasn't already set From eebbed1f76bf523a3360e2f075bb3610a9ab4700 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 19 Mar 2022 12:54:59 +0200 Subject: [PATCH 16/37] switched to asyncio for xrun --- setup.py | 2 +- stimela/utils/__init__.py | 2 +- stimela/utils/xrun_asyncio.py | 104 ++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 stimela/utils/xrun_asyncio.py diff --git a/setup.py b/setup.py index 24c96e74..6664a1f9 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ url="https://github.com/ratt-ru/Stimela", packages=find_packages(), include_package_data=True, - python_requires='>=3.6', + python_requires='>=3.7', install_requires=requirements, entry_points=""" [console_scripts] diff --git a/stimela/utils/__init__.py b/stimela/utils/__init__.py index 19c68208..f58cd62b 100644 --- a/stimela/utils/__init__.py +++ b/stimela/utils/__init__.py @@ -16,7 +16,7 @@ CPUS = 1 -from .xrun_poll import xrun +from .xrun_asyncio import xrun def assign(key, value): frame = inspect.currentframe().f_back diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py new file mode 100644 index 00000000..62060f9e --- /dev/null +++ b/stimela/utils/xrun_asyncio.py @@ -0,0 +1,104 @@ +import traceback, subprocess, errno, re, time, logging, os, sys, signal +import asyncio +from .xrun_poll import get_stimela_logger, dispatch_to_log, xrun_nolog +from . import StimelaCabRuntimeError, StimelaProcessRuntimeError + +DEBUG = 0 + +log = None + + +async def stream_reader(stream, line_handler, exit_handler): + while not stream.at_eof(): + line = await stream.readline() + line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() + line_handler(line) + # Finished + exit_handler() + + +async def xrun_impl(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): + command_name = command_name or command + + # this part could be inside the container + command_line = " ".join([command] + list(map(str, options))) + if shell: + command_line = " ".join([command] + list(map(str, options))) + command = [command_line] + else: + command = [command] + list(map(str, options)) + command_line = " ".join(command) + + log = log or get_stimela_logger() + + if log is None: + return xrun_nolog(command, name=command_name, shell=shell) + + # this part is never inside the container + import stimela + + log = log or stimela.logger() + + log.info("running " + command_line, extra=dict(stimela_subprocess_output=(command_name, "start"))) + + start_time = time.time() + + proc = await asyncio.create_subprocess_exec(*command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE) + + def line_dispatcher(line, stream_name): + dispatch_to_log(log, line, command_name, stream_name="stderr", output_wrangler=output_wrangler) + + results = await asyncio.gather( + asyncio.create_task(proc.wait()), + asyncio.create_task(stream_reader(proc.stdout, lambda line:line_dispatcher(line, "stdout"))), + asyncio.create_task(stream_reader(proc.stderr, lambda line:line_dispatcher(line, "stderr"))), + return_exceptions=True + ) + status = proc.returncode + + for result in results: + if isinstance(result, SystemExit): + raise StimelaCabRuntimeError(f"{command_name}: SystemExit with code {status}", log=log) + elif isinstance(result, KeyboardInterrupt): + if callable(kill_callback): + log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") + kill_callback() + log.info(f"the {command_name} process was shut down successfully", + extra=dict(stimela_subprocess_output=(command_name, "status"))) + await proc.wait() + else: + log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") + proc.send_signal(signal.SIGINT) + for retry in range(10): + if retry: + log.info(f"Process {proc.pid} not exited after {retry} seconds, waiting a bit longer...") + try: + await proc.wait(1) + log.info(f"Process {proc.pid} has exited with return code {proc.returncode}") + break + except subprocess.TimeoutExpired as exc: + if retry == 4: + log.warning(f"Terminating process {proc.pid}") + proc.terminate() + else: + log.warning(f"Killing process {proc.pid}") + proc.kill() + await proc.wait() + raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") + + elif isinstance(result, Exception): + traceback.print_exc() + await proc.wait() + raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) + + if status and not return_errcode: + raise StimelaCabRuntimeError(f"{command_name} returns error code {status}") + + return status + +async def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): + return asyncio.run(xrun_impl(command, options, log=log, env=env, + timeout=timeout, kill_callback=kill_callback, output_wrangler=output_wrangler, + shell=shell, return_errcode=return_errcode, command_name=command_name)) From 0b5072acde8a3464bca645a5ccf551f38e56b21b Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 19 Mar 2022 13:08:48 +0200 Subject: [PATCH 17/37] name stream properly --- setup.py | 2 +- stimela/utils/xrun_asyncio.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6664a1f9..24c96e74 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ url="https://github.com/ratt-ru/Stimela", packages=find_packages(), include_package_data=True, - python_requires='>=3.7', + python_requires='>=3.6', install_requires=requirements, entry_points=""" [console_scripts] diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index 62060f9e..303fb252 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -48,7 +48,7 @@ async def xrun_impl(command, options, log=None, env=None, timeout=-1, kill_callb stderr=asyncio.subprocess.PIPE) def line_dispatcher(line, stream_name): - dispatch_to_log(log, line, command_name, stream_name="stderr", output_wrangler=output_wrangler) + dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler) results = await asyncio.gather( asyncio.create_task(proc.wait()), From 7414e8077439cde22d0659bd4bb397bb45f1c3d0 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 19 Mar 2022 14:07:17 +0200 Subject: [PATCH 18/37] added another sample recipe --- stimela/tests/test_xrun.yml | 74 +++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 stimela/tests/test_xrun.yml diff --git a/stimela/tests/test_xrun.yml b/stimela/tests/test_xrun.yml new file mode 100644 index 00000000..d264cb14 --- /dev/null +++ b/stimela/tests/test_xrun.yml @@ -0,0 +1,74 @@ +cabs: + sleep: + command: sleep + inputs: + seconds: + dtype: int + required: true + policies: + positional: true + help: + command: wsclean + + shell: + command: '{current.command} {current.args}' + policies: + skip: true + inputs: + command: + dtype: str + required: true + args: + dtype: str + required: true + + fileops: + command: '{current.command}' + policies: + positional: true + inputs: + command: + choices: + - cp + - mv + policies: + skip: true + src: + dtype: File + required: true + must_exist: false + dest: + dtype: Union[File, Directory] + required: true + must_exist: false + +opts: + log: + dir: test-logs/logs-{config.run.datetime} + nest: 3 + symlink: logs + + +recipe: + name: "demo recipe" + info: 'top level recipe definition' + + steps: + echo: + cab: shell + params: + command: echo + args: "1 2 3 4 5" + + # help: + # cab: help + # slp1: + # cab: sleep + # params: + # seconds: 5 + cp: + cab: fileops + params: + command: cp + src: a + dest: a From 4a5bc4c6c896657858a1ae5ccbb32fdd3dcbd2c2 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sun, 20 Mar 2022 14:11:09 +0200 Subject: [PATCH 19/37] got asyncio to work, plus CPU reporting --- setup.py | 1 + stimela/backends/native.py | 2 +- stimela/tests/test_xrun.yml | 8 +-- stimela/utils/xrun_asyncio.py | 128 +++++++++++++++++++--------------- 4 files changed, 79 insertions(+), 60 deletions(-) diff --git a/setup.py b/setup.py index 24c96e74..bc7e4b3c 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ "click", "pydantic", "pathos", + "psutil" ], PACKAGE_NAME = "stimela" diff --git a/stimela/backends/native.py b/stimela/backends/native.py index fba72fdf..d5e0e096 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -7,7 +7,7 @@ from scabha.cargo import Cab, Parameter from stimela import logger -from stimela.utils.xrun_poll import xrun, dispatch_to_log +from stimela.utils.xrun_asyncio import xrun, dispatch_to_log from stimela.exceptions import StimelaCabRuntimeError import click diff --git a/stimela/tests/test_xrun.yml b/stimela/tests/test_xrun.yml index d264cb14..0edf23d3 100644 --- a/stimela/tests/test_xrun.yml +++ b/stimela/tests/test_xrun.yml @@ -62,10 +62,10 @@ recipe: # help: # cab: help - # slp1: - # cab: sleep - # params: - # seconds: 5 + slp1: + cab: sleep + params: + seconds: 5 cp: cab: fileops params: diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index 303fb252..5a13a135 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -1,5 +1,6 @@ import traceback, subprocess, errno, re, time, logging, os, sys, signal import asyncio +import psutil from .xrun_poll import get_stimela_logger, dispatch_to_log, xrun_nolog from . import StimelaCabRuntimeError, StimelaProcessRuntimeError @@ -8,16 +9,8 @@ log = None -async def stream_reader(stream, line_handler, exit_handler): - while not stream.at_eof(): - line = await stream.readline() - line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() - line_handler(line) - # Finished - exit_handler() - -async def xrun_impl(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): +def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): command_name = command_name or command # this part could be inside the container @@ -43,62 +36,87 @@ async def xrun_impl(command, options, log=None, env=None, timeout=-1, kill_callb start_time = time.time() - proc = await asyncio.create_subprocess_exec(*command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE) - - def line_dispatcher(line, stream_name): - dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler) - - results = await asyncio.gather( - asyncio.create_task(proc.wait()), - asyncio.create_task(stream_reader(proc.stdout, lambda line:line_dispatcher(line, "stdout"))), - asyncio.create_task(stream_reader(proc.stderr, lambda line:line_dispatcher(line, "stderr"))), - return_exceptions=True - ) - status = proc.returncode - - for result in results: - if isinstance(result, SystemExit): - raise StimelaCabRuntimeError(f"{command_name}: SystemExit with code {status}", log=log) - elif isinstance(result, KeyboardInterrupt): - if callable(kill_callback): - log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") - kill_callback() - log.info(f"the {command_name} process was shut down successfully", - extra=dict(stimela_subprocess_output=(command_name, "status"))) - await proc.wait() - else: - log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") - proc.send_signal(signal.SIGINT) + loop = asyncio.get_event_loop() + + proc = loop.run_until_complete( + asyncio.create_subprocess_exec(*command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE)) + + async def stream_reader(stream, stream_name): + while not stream.at_eof(): + line = await stream.readline() + line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() + if line or not stream.at_eof(): + dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler) + + async def cpu_reporter_impl(period): + while True: + await asyncio.sleep(period) + usage = psutil.cpu_percent() + log.info(f"Current CPU usage is {usage}%") + + async def wrap_cancellable(job): + try: + return await job + except asyncio.CancelledError as exc: + return None + + async def proc_awaiter(proc, *cancellables): + await proc.wait() + for task in cancellables: + task.cancel() + + reporter = asyncio.Task(cpu_reporter_impl(5)) + + try: + job = asyncio.gather( + proc_awaiter(proc, reporter), + stream_reader(proc.stdout, "stdout"), + stream_reader(proc.stderr, "stderr"), + wrap_cancellable(reporter) + ) + results = loop.run_until_complete(job) + status = proc.returncode + except SystemExit as exc: + loop.run_until_complete(proc.wait()) + except KeyboardInterrupt: + if callable(kill_callback): + log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") + kill_callback() + log.info(f"the {command_name} process was shut down successfully", + extra=dict(stimela_subprocess_output=(command_name, "status"))) + loop.run_until_complete(proc.wait()) + else: + log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") + proc.send_signal(signal.SIGINT) + + async def wait_on_process(proc): for retry in range(10): - if retry: - log.info(f"Process {proc.pid} not exited after {retry} seconds, waiting a bit longer...") - try: - await proc.wait(1) + await asyncio.sleep(1) + if proc.returncode is not None: log.info(f"Process {proc.pid} has exited with return code {proc.returncode}") break - except subprocess.TimeoutExpired as exc: - if retry == 4: - log.warning(f"Terminating process {proc.pid}") - proc.terminate() + if retry == 5: + log.warning(f"Process {proc.pid} not exited after {retry} seconds, will tyr to terminate it") + proc.terminate() + else: + log.info(f"Process {proc.pid} not exited after {retry} seconds, waiting a bit longer...") else: log.warning(f"Killing process {proc.pid}") proc.kill() - await proc.wait() - raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") + + loop.run_until_complete(wait_on_process(proc)) + + raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") - elif isinstance(result, Exception): - traceback.print_exc() - await proc.wait() - raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) + except Exception as exc: + traceback.print_exc() + loop.run_until_complete(proc.wait()) + raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) if status and not return_errcode: raise StimelaCabRuntimeError(f"{command_name} returns error code {status}") return status -async def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): - return asyncio.run(xrun_impl(command, options, log=log, env=env, - timeout=timeout, kill_callback=kill_callback, output_wrangler=output_wrangler, - shell=shell, return_errcode=return_errcode, command_name=command_name)) From 214716418ec4f9078c6922c1312ee5f6f399eb1e Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sun, 20 Mar 2022 14:21:52 +0200 Subject: [PATCH 20/37] CPU reporting in blue colour --- stimela/stimelogging.py | 1 + stimela/utils/xrun_asyncio.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/stimela/stimelogging.py b/stimela/stimelogging.py index f244e1d6..c7a55a62 100644 --- a/stimela/stimelogging.py +++ b/stimela/stimelogging.py @@ -43,6 +43,7 @@ class ConsoleColors(): DIM = '\033[2m' if sys.stdin.isatty() else '' GREEN = '\033[92m' if sys.stdin.isatty() else '' YELLOW = '\033[93m' if sys.stdin.isatty() else '' + BLUE = '\033[94m' if sys.stdin.isatty() else '' WHITE = '\033[39m' if sys.stdin.isatty() else '' ENDC = '\033[0m' if sys.stdin.isatty() else '' diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index 5a13a135..fd9a44f2 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -54,7 +54,7 @@ async def cpu_reporter_impl(period): while True: await asyncio.sleep(period) usage = psutil.cpu_percent() - log.info(f"Current CPU usage is {usage}%") + log.info(f"Current CPU usage is {usage}%", extra=dict(color="BLUE")) async def wrap_cancellable(job): try: From 3f992575152c201a5ad3e0111d1031271f3cfeff Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 21 Mar 2022 16:36:02 +0200 Subject: [PATCH 21/37] added CPU/RAM monitor --- setup.py | 3 +- stimela/backends/native.py | 2 +- stimela/stimelogging.py | 6 +- stimela/utils/xrun_asyncio.py | 186 ++++++++++++++++++++-------------- stimela/utils/xrun_poll.py | 4 +- 5 files changed, 121 insertions(+), 80 deletions(-) diff --git a/setup.py b/setup.py index bc7e4b3c..8549a2f8 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,8 @@ "click", "pydantic", "pathos", - "psutil" + "psutil", + "rich" ], PACKAGE_NAME = "stimela" diff --git a/stimela/backends/native.py b/stimela/backends/native.py index d5e0e096..296ec96a 100644 --- a/stimela/backends/native.py +++ b/stimela/backends/native.py @@ -159,7 +159,7 @@ def run_command(cab: Cab, params: Dict[str, Any], log, subst: Optional[Dict[str, retcode = xrun(args[0], args[1:], shell=False, log=log, output_wrangler=cab.apply_output_wranglers, - return_errcode=True, command_name=command_name) + return_errcode=True, command_name=command_name, progress_bar=True) # if retcode is not zero, raise error, unless cab declared itself a success (via the wrangler) if retcode: diff --git a/stimela/stimelogging.py b/stimela/stimelogging.py index c7a55a62..9e2df123 100644 --- a/stimela/stimelogging.py +++ b/stimela/stimelogging.py @@ -13,7 +13,11 @@ def __init__(self, info_stream=sys.stdout, err_stream=sys.stderr): self.multiplex = True def emit(self, record): - handler = self.err_handler if record.levelno > logging.INFO and self.multiplex else self.info_handler + # does record come with its own handler? Rather use that + if hasattr(record, 'custom_console_handler'): + handler = record.custom_console_handler + else: + handler = self.err_handler if record.levelno > logging.INFO and self.multiplex else self.info_handler handler.emit(record) # ignore broken pipes, this often happens when cleaning up and exiting try: diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index fd9a44f2..0cbc340b 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -1,6 +1,13 @@ import traceback, subprocess, errno, re, time, logging, os, sys, signal import asyncio import psutil +import rich +import rich.highlighter +from rich.style import Style +from rich.table import Column +from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn +from rich.logging import RichHandler + from .xrun_poll import get_stimela_logger, dispatch_to_log, xrun_nolog from . import StimelaCabRuntimeError, StimelaProcessRuntimeError @@ -10,7 +17,9 @@ -def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, return_errcode=False, command_name=None): +def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, output_wrangler=None, shell=True, + return_errcode=False, command_name=None, progress_bar=False): + command_name = command_name or command # this part could be inside the container @@ -36,84 +45,109 @@ def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, o start_time = time.time() - loop = asyncio.get_event_loop() - - proc = loop.run_until_complete( - asyncio.create_subprocess_exec(*command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE)) - - async def stream_reader(stream, stream_name): - while not stream.at_eof(): - line = await stream.readline() - line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() - if line or not stream.at_eof(): - dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler) - - async def cpu_reporter_impl(period): - while True: - await asyncio.sleep(period) - usage = psutil.cpu_percent() - log.info(f"Current CPU usage is {usage}%", extra=dict(color="BLUE")) - - async def wrap_cancellable(job): + def render_process_status(): + cpu = psutil.cpu_percent() + mem = psutil.virtual_memory() + used = round(mem.used / 2**30) + total = round(mem.total / 2**30) + return f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G" + + with Progress( + SpinnerColumn(), + f"running {command_name}", + TimeElapsedColumn(table_column=Column(style="blue")), + "{task.description}", + refresh_per_second=2, + transient=True) as progress: + + if progress_bar: + progress_task = progress.add_task(render_process_status()) + log_handler = RichHandler(console=progress, + highlighter=rich.highlighter.NullHighlighter(), + show_level=False, show_path=False, show_time=False) + else: + progress_task = log_handler = None + + loop = asyncio.get_event_loop() + + proc = loop.run_until_complete( + asyncio.create_subprocess_exec(*command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE)) + + async def stream_reader(stream, stream_name): + while not stream.at_eof(): + line = await stream.readline() + line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() + if line or not stream.at_eof(): + dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler, custom_console_handler=log_handler) + + async def cpu_reporter_impl(period): + while True: + await asyncio.sleep(period) + progress_bar and progress.update(progress_task, description=render_process_status()) + # progress.print(f"Current CPU usage is {usage}%") + + async def wrap_cancellable(job): + try: + return await job + except asyncio.CancelledError as exc: + return None + + async def proc_awaiter(proc, *cancellables): + await proc.wait() + for task in cancellables: + task.cancel() + + reporter = asyncio.Task(cpu_reporter_impl(1)) + try: - return await job - except asyncio.CancelledError as exc: - return None - - async def proc_awaiter(proc, *cancellables): - await proc.wait() - for task in cancellables: - task.cancel() - - reporter = asyncio.Task(cpu_reporter_impl(5)) - - try: - job = asyncio.gather( - proc_awaiter(proc, reporter), - stream_reader(proc.stdout, "stdout"), - stream_reader(proc.stderr, "stderr"), - wrap_cancellable(reporter) - ) - results = loop.run_until_complete(job) - status = proc.returncode - except SystemExit as exc: - loop.run_until_complete(proc.wait()) - except KeyboardInterrupt: - if callable(kill_callback): - log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") - kill_callback() - log.info(f"the {command_name} process was shut down successfully", - extra=dict(stimela_subprocess_output=(command_name, "status"))) + job = asyncio.gather( + proc_awaiter(proc, reporter), + stream_reader(proc.stdout, "stdout"), + stream_reader(proc.stderr, "stderr"), + wrap_cancellable(reporter) + ) + results = loop.run_until_complete(job) + status = proc.returncode + except SystemExit as exc: loop.run_until_complete(proc.wait()) - else: - log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") - proc.send_signal(signal.SIGINT) - - async def wait_on_process(proc): - for retry in range(10): - await asyncio.sleep(1) - if proc.returncode is not None: - log.info(f"Process {proc.pid} has exited with return code {proc.returncode}") - break - if retry == 5: - log.warning(f"Process {proc.pid} not exited after {retry} seconds, will tyr to terminate it") - proc.terminate() + except KeyboardInterrupt: + progress.stop() + if callable(kill_callback): + log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") + kill_callback() + log.info(f"the {command_name} process was shut down successfully", + extra=dict(stimela_subprocess_output=(command_name, "status"))) + loop.run_until_complete(proc.wait()) + else: + log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") + proc.send_signal(signal.SIGINT) + + async def wait_on_process(proc): + for retry in range(10): + await asyncio.sleep(1) + if proc.returncode is not None: + log.info(f"Process {proc.pid} has exited with return code {proc.returncode}") + break + if retry == 5: + log.warning(f"Process {proc.pid} not exited after {retry} seconds, will tyr to terminate it") + proc.terminate() + else: + log.info(f"Process {proc.pid} not exited after {retry} seconds, waiting a bit longer...") else: - log.info(f"Process {proc.pid} not exited after {retry} seconds, waiting a bit longer...") - else: - log.warning(f"Killing process {proc.pid}") - proc.kill() - - loop.run_until_complete(wait_on_process(proc)) - - raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") - - except Exception as exc: - traceback.print_exc() - loop.run_until_complete(proc.wait()) - raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) + log.warning(f"Killing process {proc.pid}") + proc.kill() + + loop.run_until_complete(wait_on_process(proc)) + + raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") + + except Exception as exc: + progress.stop() + traceback.print_exc() + loop.run_until_complete(proc.wait()) + raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) if status and not return_errcode: raise StimelaCabRuntimeError(f"{command_name} returns error code {status}") diff --git a/stimela/utils/xrun_poll.py b/stimela/utils/xrun_poll.py index 183c7874..dfa3b537 100644 --- a/stimela/utils/xrun_poll.py +++ b/stimela/utils/xrun_poll.py @@ -135,7 +135,7 @@ def xrun_nolog(command, name=None, shell=True): return 0 -def dispatch_to_log(log, line, command_name, stream_name, output_wrangler): +def dispatch_to_log(log, line, command_name, stream_name, output_wrangler, custom_console_handler=None): # dispatch output to log line = _remove_ctrls(line) extra = dict(stimela_subprocess_output=(command_name, stream_name)) @@ -143,6 +143,8 @@ def dispatch_to_log(log, line, command_name, stream_name, output_wrangler): severity = logging.INFO if stream_name == 'stderr': extra['color'] = 'WHITE' + if custom_console_handler is not None: + extra['custom_console_handler'] = custom_console_handler # feed through wrangler to adjust severity and content if output_wrangler is not None: line, severity = output_wrangler(line, severity) From ad3a3f11b50770b872bdc3da2c5027b898a4fd59 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Tue, 22 Mar 2022 11:15:37 +0200 Subject: [PATCH 22/37] improve mem meter --- stimela/utils/xrun_asyncio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index 0cbc340b..4c21aa78 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -48,7 +48,7 @@ def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, o def render_process_status(): cpu = psutil.cpu_percent() mem = psutil.virtual_memory() - used = round(mem.used / 2**30) + used = round(mem.total*mem.percent/100 / 2**30) total = round(mem.total / 2**30) return f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G" From 1f15f3e8489a448d7aa46626ac320dee6de09341 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Tue, 22 Mar 2022 20:15:08 +0200 Subject: [PATCH 23/37] added rich progress bar with step names --- stimela/commands/run.py | 21 +++- stimela/kitchen/recipe.py | 206 +++++++++++++++++----------------- stimela/stimelogging.py | 81 ++++++++++++- stimela/tests/test_xrun.yml | 16 ++- stimela/utils/xrun_asyncio.py | 67 ++++------- 5 files changed, 228 insertions(+), 163 deletions(-) diff --git a/stimela/commands/run.py b/stimela/commands/run.py index a115d794..03377eee 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -6,10 +6,11 @@ from scabha import configuratt from scabha.exceptions import ScabhaBaseException from omegaconf.omegaconf import OmegaConf, OmegaConfBaseException +from stimela import stimelogging from stimela.config import ConfigExceptionTypes import click import logging -import os.path, yaml, sys +import os.path, yaml, sys, asyncio from typing import List, Optional import stimela from stimela import logger @@ -17,6 +18,18 @@ from stimela.kitchen.recipe import Recipe, Step, join_quote from stimela.config import get_config_class +# def print_schema_help(cargo_type, name, cargo): +# print( +# f""" +# {cargo_type} name: {name} +# Info: {cargo.info} +# Inputs: +# """) +# for name, schema in cargo.inputs: + + + + @cli.command("run", help=""" Execute a single cab, or a recipe from a YML file. @@ -39,9 +52,11 @@ help="""Sets skip=false on the given step(s). Use commas, or give multiple times for multiple steps.""") @click.option("-d", "--dry-run", is_flag=True, help="""Doesn't actually run anything, only prints the selected steps.""") +@click.option("-h", "--help", is_flag=True, + help="""Prints help on the selected runnable and its parameters, then exits.""") @click.argument("what", metavar="filename.yml|CAB") @click.argument("parameters", nargs=-1, metavar="[recipe name] [PARAM=VALUE] [X.Y.Z=FOO] ...", required=False) -def run(what: str, parameters: List[str] = [], dry_run: bool = False, +def run(what: str, parameters: List[str] = [], dry_run: bool = False, help: bool = False, step_names: List[str] = [], tags: List[str] = [], skip_tags: List[str] = [], enable_steps: List[str] = []): log = logger() @@ -133,6 +148,7 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, sys.exit(2) else: if len(all_recipe_names) > 1: + print(f"This file contains the following recipes: {', '.join(all_recipe_names)}") log.error(f"multiple recipes found, please specify one on the command line") sys.exit(2) recipe_name = all_recipe_names[0] @@ -280,7 +296,6 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, log.info("dry run was requested, exiting") sys.exit(0) - # run step try: outputs = outer_step.run() except ScabhaBaseException as exc: diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 7cfc5f23..0b722700 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -1,5 +1,5 @@ from multiprocessing import cpu_count -import os, os.path, re, logging, fnmatch, copy +import os, os.path, re, logging, fnmatch, copy, time from typing import Any, Tuple, List, Dict, Optional, Union from dataclasses import dataclass from omegaconf import MISSING, OmegaConf, DictConfig, ListConfig @@ -182,116 +182,118 @@ def run(self, subst=None, batch=None): if self.validated_params is None: self.prevalidate(self.params) - # Since prevalidation will have populated default values for potentially missing parameters, use those values - # For parameters that aren't missing, use whatever value that was suplied - params = self.validated_params.copy() - params.update(**self.params) - - # # However the unresolved ones should be - # params = self.validated_params - # for name, value in params.items(): - # if type(value) is Unresolved: - # params[name] = self.params[name] - # # params = self.params - - skip_warned = False # becomes True when warnings are given - - self.log.debug(f"validating inputs {subst and list(subst.keys())}") - validated = None - try: - params = self.cargo.validate_inputs(params, loosely=self.skip, subst=subst) - validated = True - - except ScabhaBaseException as exc: - level = logging.WARNING if self.skip else logging.ERROR - if not exc.logged: - if type(exc) is SubstitutionErrorList: - self.log.log(level, f"unresolved {{}}-substitution(s):") - for err in exc.errors: - self.log.log(level, f" {err}") - else: - self.log.log(level, f"error validating inputs: {exc}") - exc.logged = True - self.log_summary(level, "summary of inputs follows", color="WARNING") - # raise up, unless step is being skipped - if self.skip: - self.log.warning("since the step is being skipped, this is not fatal") - skip_warned = True - else: - raise + with stimelogging.declare_subtask(self.name) as subtask: - self.validated_params.update(**params) + # Since prevalidation will have populated default values for potentially missing parameters, use those values + # For parameters that aren't missing, use whatever value that was suplied + params = self.validated_params.copy() + params.update(**self.params) - # log inputs - if validated and not self.skip: - self.log_summary(logging.INFO, "validated inputs", color="GREEN", ignore_missing=True) - if subst is not None: - subst.current = params + # # However the unresolved ones should be + # params = self.validated_params + # for name, value in params.items(): + # if type(value) is Unresolved: + # params[name] = self.params[name] + # # params = self.params - # bomb out if some inputs failed to validate or substitutions resolve - if self.invalid_params or self.unresolved_params: - invalid = self.invalid_params + self.unresolved_params - if self.skip: - self.log.warning(f"invalid inputs: {join_quote(invalid)}") - if not skip_warned: - self.log.warning("since the step was skipped, this is not fatal") - skip_warned = True - else: - raise StepValidationError(f"invalid inputs: {join_quote(invalid)}", log=self.log) + skip_warned = False # becomes True when warnings are given - if not self.skip: + self.log.debug(f"validating inputs {subst and list(subst.keys())}") + validated = None try: - if type(self.cargo) is Recipe: - self.cargo.backend = self.cargo.backend or self.backend - self.cargo._run(params) - elif type(self.cargo) is Cab: - self.cargo.backend = self.cargo.backend or self.backend - runners.run_cab(self.cargo, params, log=self.log, subst=subst, batch=batch) - else: - raise RuntimeError("Unknown cargo type") + params = self.cargo.validate_inputs(params, loosely=self.skip, subst=subst) + validated = True + except ScabhaBaseException as exc: + level = logging.WARNING if self.skip else logging.ERROR if not exc.logged: - self.log.error(f"error running step: {exc}") + if type(exc) is SubstitutionErrorList: + self.log.log(level, f"unresolved {{}}-substitution(s):") + for err in exc.errors: + self.log.log(level, f" {err}") + else: + self.log.log(level, f"error validating inputs: {exc}") exc.logged = True - raise - - self.log.debug(f"validating outputs") - validated = False - - try: - params = self.cargo.validate_outputs(params, loosely=self.skip, subst=subst) - validated = True - except ScabhaBaseException as exc: - level = logging.WARNING if self.skip else logging.ERROR - if not exc.logged: - if type(exc) is SubstitutionErrorList: - self.log.log(level, f"unresolved {{}}-substitution(s):") - for err in exc.errors: - self.log.log(level, f" {err}") + self.log_summary(level, "summary of inputs follows", color="WARNING") + # raise up, unless step is being skipped + if self.skip: + self.log.warning("since the step is being skipped, this is not fatal") + skip_warned = True else: - self.log.log(level, f"error validating outputs: {exc}") - exc.logged = True - # raise up, unless step is being skipped - if self.skip: - self.log.warning("since the step was skipped, this is not fatal") - else: - self.log_summary(level, "failed outputs", color="WARNING") - raise + raise - if validated: self.validated_params.update(**params) - if subst is not None: - subst.current._merge_(params) - self.log_summary(logging.DEBUG, "validated outputs", ignore_missing=True) - - # bomb out if an output was invalid - invalid = [name for name in self.invalid_params + self.unresolved_params if name in self.cargo.outputs] - if invalid: - if self.skip: - self.log.warning(f"invalid outputs: {join_quote(invalid)}") - self.log.warning("since the step was skipped, this is not fatal") - else: - raise StepValidationError(f"invalid outputs: {join_quote(invalid)}", log=self.log) + + # log inputs + if validated and not self.skip: + self.log_summary(logging.INFO, "validated inputs", color="GREEN", ignore_missing=True) + if subst is not None: + subst.current = params + + # bomb out if some inputs failed to validate or substitutions resolve + if self.invalid_params or self.unresolved_params: + invalid = self.invalid_params + self.unresolved_params + if self.skip: + self.log.warning(f"invalid inputs: {join_quote(invalid)}") + if not skip_warned: + self.log.warning("since the step was skipped, this is not fatal") + skip_warned = True + else: + raise StepValidationError(f"invalid inputs: {join_quote(invalid)}", log=self.log) + + if not self.skip: + try: + if type(self.cargo) is Recipe: + self.cargo.backend = self.cargo.backend or self.backend + self.cargo._run(params) + elif type(self.cargo) is Cab: + self.cargo.backend = self.cargo.backend or self.backend + runners.run_cab(self.cargo, params, log=self.log, subst=subst, batch=batch) + else: + raise RuntimeError("Unknown cargo type") + except ScabhaBaseException as exc: + if not exc.logged: + self.log.error(f"error running step: {exc}") + exc.logged = True + raise + + self.log.debug(f"validating outputs") + validated = False + + try: + params = self.cargo.validate_outputs(params, loosely=self.skip, subst=subst) + validated = True + except ScabhaBaseException as exc: + level = logging.WARNING if self.skip else logging.ERROR + if not exc.logged: + if type(exc) is SubstitutionErrorList: + self.log.log(level, f"unresolved {{}}-substitution(s):") + for err in exc.errors: + self.log.log(level, f" {err}") + else: + self.log.log(level, f"error validating outputs: {exc}") + exc.logged = True + # raise up, unless step is being skipped + if self.skip: + self.log.warning("since the step was skipped, this is not fatal") + else: + self.log_summary(level, "failed outputs", color="WARNING") + raise + + if validated: + self.validated_params.update(**params) + if subst is not None: + subst.current._merge_(params) + self.log_summary(logging.DEBUG, "validated outputs", ignore_missing=True) + + # bomb out if an output was invalid + invalid = [name for name in self.invalid_params + self.unresolved_params if name in self.cargo.outputs] + if invalid: + if self.skip: + self.log.warning(f"invalid outputs: {join_quote(invalid)}") + self.log.warning("since the step was skipped, this is not fatal") + else: + raise StepValidationError(f"invalid outputs: {join_quote(invalid)}", log=self.log) return params @@ -915,7 +917,6 @@ def _run(self, params) -> Dict[str, Any]: Recipe._root_recipe_ns = recipe_ns subst._add_('root', Recipe._root_recipe_ns) - logopts = self.config.opts.log.copy() if 'log' in self.assign: logopts.update(**self.assign.log) @@ -942,8 +943,6 @@ def _run(self, params) -> Dict[str, Any]: if schema.required: raise RecipeValidationError(f"recipe '{self.name}' is missing required input '{name}'", log=self.log) - - # iterate over for-loop values (if not looping, this is set up to [None] in advance) scatter = getattr(self.for_loop, "scatter", False) @@ -958,6 +957,7 @@ def loop_worker(inst, step, label, subst, count, iter_var): # update variables inst.assign[inst.for_loop.var] = iter_var inst.assign[f"{inst.for_loop.var}@index"] = count + stimelogging.declare_subtask_attributes(**{inst.for_loop.var: f"{count+1}/{len(inst._for_loop_values)}"}) inst.update_assignments(inst.assign, inst.assign_based_on, inst.fqname) subst.recipe._merge_(inst.assign) # update logfile name (since this may depend on substitutions) diff --git a/stimela/stimelogging.py b/stimela/stimelogging.py index 9e2df123..32bda775 100644 --- a/stimela/stimelogging.py +++ b/stimela/stimelogging.py @@ -1,8 +1,14 @@ +import atexit import sys, os.path, re import logging -from typing import Optional, Dict, Any, Union +import contextlib +from typing import Optional, Union from omegaconf import DictConfig from scabha.substitutions import SubstitutionNS, forgiving_substitutions_from +import asyncio +import psutil +import rich.progress +import rich.logging class MultiplexingHandler(logging.Handler): """handler to send INFO and below to stdout, everything above to stderr""" @@ -80,6 +86,7 @@ def format(self, record): class SelectiveFormatter(logging.Formatter): """Selective formatter. if condition(record) is True, invokes other formatter""" def __init__(self, default_formatter, dispatch_list): + logging.Formatter.__init__(self) self._dispatch_list = dispatch_list self._default_formatter = default_formatter @@ -93,7 +100,7 @@ def format(self, record): _logger = None log_console_handler = log_formatter = log_boring_formatter = log_colourful_formatter = None - +progress_bar = progress_task = None def is_logger_initialized(): return _logger is not None @@ -132,10 +139,28 @@ def _is_from_subprocess(rec): log_formatter = log_boring_formatter if boring else log_colourful_formatter if console: + global progress_bar, progress_task + progress_bar = rich.progress.Progress( + rich.progress.SpinnerColumn(), + "{task.description}", + rich.progress.SpinnerColumn(), + "{task.fields[command]}", + rich.progress.TimeElapsedColumn(), + "{task.fields[cpu_info]}", + refresh_per_second=2, + transient=True) + + progress_task = progress_bar.add_task("stimela", command=" ", cpu_info=" ", start=True) + progress_bar.__enter__() + atexit.register(lambda:progress_bar.__exit__(None, None, None)) + if "SILENT_STDERR" in os.environ and os.environ["SILENT_STDERR"].upper()=="ON": log_console_handler = logging.StreamHandler(stream=sys.stdout) else: - log_console_handler = MultiplexingHandler() + log_console_handler = rich.logging.RichHandler(console=progress_bar, + highlighter=rich.highlighter.NullHighlighter(), + show_level=False, show_path=False, show_time=False) + log_console_handler.setFormatter(log_formatter) log_console_handler.setLevel(loglevel) _logger.addHandler(log_console_handler) @@ -146,6 +171,56 @@ def _is_from_subprocess(rec): return _logger +progress_task_names = [] +progress_task_names_orig = [] + +@contextlib.contextmanager +def declare_subtask(subtask_name): + progress_task_names.append(subtask_name) + progress_task_names_orig.append(subtask_name) + if progress_bar is not None: + progress_bar.update(progress_task, description='.'.join(progress_task_names)) + try: + yield subtask_name + finally: + progress_task_names.pop(-1) + progress_task_names_orig.pop(-1) + if progress_bar is not None: + progress_bar.update(progress_task, description='.'.join(progress_task_names)) + +def declare_subtask_attributes(**kw): + attrs = ','.join([f"{key} {value}" for key, value in kw.items()]) + progress_task_names[-1] = f"{progress_task_names_orig[-1]}\[{attrs}]" + + if progress_bar is not None: + progress_bar.update(progress_task, description='.'.join(progress_task_names)) + + +@contextlib.contextmanager +def declare_subcommand(command): + progress_bar and progress_bar.update(progress_task, command=command) + progress_bar and progress_bar.reset(progress_task) + try: + yield command + finally: + progress_bar and progress_bar.update(progress_task, command="") + + +async def run_process_status_update(): + if progress_bar: + with contextlib.suppress(asyncio.CancelledError): + while True: + cpu = psutil.cpu_percent() + mem = psutil.virtual_memory() + used = round(mem.total*mem.percent/100 / 2**30) + total = round(mem.total / 2**30) + progress_bar.update(progress_task, cpu_info=f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G") + await asyncio.sleep(1) + + + + + _logger_file_handlers = {} _logger_console_handlers = {} diff --git a/stimela/tests/test_xrun.yml b/stimela/tests/test_xrun.yml index 0edf23d3..7871c85e 100644 --- a/stimela/tests/test_xrun.yml +++ b/stimela/tests/test_xrun.yml @@ -53,6 +53,10 @@ recipe: name: "demo recipe" info: 'top level recipe definition' + for_loop: + var: x + over: [1,2,3] + steps: echo: cab: shell @@ -66,9 +70,9 @@ recipe: cab: sleep params: seconds: 5 - cp: - cab: fileops - params: - command: cp - src: a - dest: a + # cp: + # cab: fileops + # params: + # command: cp + # src: a + # dest: a diff --git a/stimela/utils/xrun_asyncio.py b/stimela/utils/xrun_asyncio.py index 4c21aa78..7b35b791 100644 --- a/stimela/utils/xrun_asyncio.py +++ b/stimela/utils/xrun_asyncio.py @@ -1,4 +1,4 @@ -import traceback, subprocess, errno, re, time, logging, os, sys, signal +import traceback, subprocess, errno, re, time, logging, os, sys, signal, contextlib, datetime import asyncio import psutil import rich @@ -8,6 +8,8 @@ from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn from rich.logging import RichHandler +from stimela import stimelogging + from .xrun_poll import get_stimela_logger, dispatch_to_log, xrun_nolog from . import StimelaCabRuntimeError, StimelaProcessRuntimeError @@ -43,31 +45,13 @@ def xrun(command, options, log=None, env=None, timeout=-1, kill_callback=None, o log.info("running " + command_line, extra=dict(stimela_subprocess_output=(command_name, "start"))) - start_time = time.time() - - def render_process_status(): - cpu = psutil.cpu_percent() - mem = psutil.virtual_memory() - used = round(mem.total*mem.percent/100 / 2**30) - total = round(mem.total / 2**30) - return f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G" - - with Progress( - SpinnerColumn(), - f"running {command_name}", - TimeElapsedColumn(table_column=Column(style="blue")), - "{task.description}", - refresh_per_second=2, - transient=True) as progress: - - if progress_bar: - progress_task = progress.add_task(render_process_status()) - log_handler = RichHandler(console=progress, - highlighter=rich.highlighter.NullHighlighter(), - show_level=False, show_path=False, show_time=False) - else: - progress_task = log_handler = None + with stimelogging.declare_subcommand(os.path.basename(command_name)): + start_time = datetime.datetime.now() + def elapsed(): + """Returns string representing elapsed time""" + return str(datetime.datetime.now() - start_time).split('.', 1)[0] + loop = asyncio.get_event_loop() proc = loop.run_until_complete( @@ -80,48 +64,36 @@ async def stream_reader(stream, stream_name): line = await stream.readline() line = (line.decode('utf-8') if type(line) is bytes else line).rstrip() if line or not stream.at_eof(): - dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler, custom_console_handler=log_handler) - - async def cpu_reporter_impl(period): - while True: - await asyncio.sleep(period) - progress_bar and progress.update(progress_task, description=render_process_status()) - # progress.print(f"Current CPU usage is {usage}%") - - async def wrap_cancellable(job): - try: - return await job - except asyncio.CancelledError as exc: - return None + dispatch_to_log(log, line, command_name, stream_name, output_wrangler=output_wrangler) async def proc_awaiter(proc, *cancellables): await proc.wait() for task in cancellables: task.cancel() - reporter = asyncio.Task(cpu_reporter_impl(1)) + reporter = asyncio.Task(stimelogging.run_process_status_update()) try: job = asyncio.gather( proc_awaiter(proc, reporter), stream_reader(proc.stdout, "stdout"), stream_reader(proc.stderr, "stderr"), - wrap_cancellable(reporter) + reporter ) results = loop.run_until_complete(job) status = proc.returncode + log.info(f"{command_name} exited with code {status} after {elapsed()}") except SystemExit as exc: loop.run_until_complete(proc.wait()) except KeyboardInterrupt: - progress.stop() if callable(kill_callback): - log.warning(f"Ctrl+C caught: shutting down {command_name} process, please give it a few moments") + log.warning(f"Ctrl+C caught after {elapsed()}, shutting down {command_name} process, please give it a few moments") kill_callback() log.info(f"the {command_name} process was shut down successfully", extra=dict(stimela_subprocess_output=(command_name, "status"))) loop.run_until_complete(proc.wait()) else: - log.warning(f"Ctrl+C caught, interrupting {command_name} process {proc.pid}") + log.warning(f"Ctrl+C caught after {elapsed()}, interrupting {command_name} process {proc.pid}") proc.send_signal(signal.SIGINT) async def wait_on_process(proc): @@ -144,13 +116,12 @@ async def wait_on_process(proc): raise StimelaCabRuntimeError(f"{command_name} interrupted with Ctrl+C") except Exception as exc: - progress.stop() - traceback.print_exc() loop.run_until_complete(proc.wait()) - raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc}'", log=log) + traceback.print_exc() + raise StimelaCabRuntimeError(f"{command_name} threw exception: {exc} after {elapsed()}'", log=log) - if status and not return_errcode: - raise StimelaCabRuntimeError(f"{command_name} returns error code {status}") + if status and not return_errcode: + raise StimelaCabRuntimeError(f"{command_name} returns error code {status} after {elapsed()}") return status From 812c66caeaf9f6f0e46731adf9526356b5fd16ad Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 23 Mar 2022 11:11:51 +0200 Subject: [PATCH 24/37] added elapsed time to status bar --- stimela/commands/run.py | 57 +++++++++++++++++++++++++++++++---------- stimela/stimelogging.py | 44 +++++++++++++++++++------------ 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 03377eee..b22372c6 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -1,6 +1,7 @@ from collections import OrderedDict import dataclasses import itertools +from datetime import datetime from yaml.error import YAMLError from scabha import configuratt @@ -18,16 +19,42 @@ from stimela.kitchen.recipe import Recipe, Step, join_quote from stimela.config import get_config_class -# def print_schema_help(cargo_type, name, cargo): -# print( -# f""" -# {cargo_type} name: {name} -# Info: {cargo.info} -# Inputs: -# """) -# for name, schema in cargo.inputs: - - +def print_schema_help(cargo_type, name, cargo, extra_defaults={}, omit_params={}): + print( +f""" + {cargo_type} name: {name} + Info: {cargo.info} +""") + def print_inputs_outputs(inputs_outputs): + for name, schema in inputs_outputs.items(): + line = f" {name}: {schema.dtype}" + default = extra_defaults.get(name) + if default is None: + default = schema.default + if default is not None: + line += f" = {default}" + line += "\n" + if schema.info: + line = f" {schema.info}\n" + print(line) + if cargo.inputs: + print(" Inputs:\n") + print_inputs_outputs(cargo.inputs) + else: + print(" Inputs: none\n") + if cargo.outputs: + print(" Outputs:\n") + print_inputs_outputs(cargo.outputs) + else: + print(" Outputs: none\n") + if type(cargo) is Recipe: + print("\n Steps") + if any(step.skip for step in cargo.steps.values()): + print(" including [skipped] steps") + step_names = [] + for label, step in cargo.steps.items(): + step_names.append(f"[{label}]" if step.skip else label) + print(f":\n {' '.join(step_names)}") @cli.command("run", @@ -296,19 +323,23 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, help: bool log.info("dry run was requested, exiting") sys.exit(0) + start_time = datetime.now() + def elapsed(): + return str(datetime.now() - start_time).split('.', 1)[0] + try: outputs = outer_step.run() except ScabhaBaseException as exc: if not exc.logged: - outer_step.log.error(f"run failed with exception: {exc}") + outer_step.log.error(f"run failed after {elapsed()} with exception: {exc}") sys.exit(1) if outputs and step.log.isEnabledFor(logging.DEBUG): - outer_step.log.debug("run successful, outputs follow:") + outer_step.log.debug(f"run successful after {elapsed()}, outputs follow:") for name, value in outputs.items(): if name in recipe.outputs: outer_step.log.debug(f" {name}: {value}") else: - outer_step.log.info("run successful") + outer_step.log.info(f"run successful after {elapsed()}") return 0 \ No newline at end of file diff --git a/stimela/stimelogging.py b/stimela/stimelogging.py index 32bda775..7805737c 100644 --- a/stimela/stimelogging.py +++ b/stimela/stimelogging.py @@ -1,5 +1,6 @@ import atexit import sys, os.path, re +from datetime import datetime import logging import contextlib from typing import Optional, Union @@ -101,6 +102,7 @@ def format(self, record): _logger = None log_console_handler = log_formatter = log_boring_formatter = log_colourful_formatter = None progress_bar = progress_task = None +_start_time = datetime.now() def is_logger_initialized(): return _logger is not None @@ -142,7 +144,8 @@ def _is_from_subprocess(rec): global progress_bar, progress_task progress_bar = rich.progress.Progress( rich.progress.SpinnerColumn(), - "{task.description}", + "[yellow]{task.fields[elapsed_time]}[/yellow]", + "[bold]{task.description}[/bold]", rich.progress.SpinnerColumn(), "{task.fields[command]}", rich.progress.TimeElapsedColumn(), @@ -150,7 +153,7 @@ def _is_from_subprocess(rec): refresh_per_second=2, transient=True) - progress_task = progress_bar.add_task("stimela", command=" ", cpu_info=" ", start=True) + progress_task = progress_bar.add_task("stimela", command="starting", cpu_info=" ", elapsed_time="", start=True) progress_bar.__enter__() atexit.register(lambda:progress_bar.__exit__(None, None, None)) @@ -178,43 +181,52 @@ def _is_from_subprocess(rec): def declare_subtask(subtask_name): progress_task_names.append(subtask_name) progress_task_names_orig.append(subtask_name) - if progress_bar is not None: - progress_bar.update(progress_task, description='.'.join(progress_task_names)) + update_process_status(description='.'.join(progress_task_names)) try: yield subtask_name finally: progress_task_names.pop(-1) progress_task_names_orig.pop(-1) - if progress_bar is not None: - progress_bar.update(progress_task, description='.'.join(progress_task_names)) + update_process_status(progress_task, description='.'.join(progress_task_names)) def declare_subtask_attributes(**kw): attrs = ','.join([f"{key} {value}" for key, value in kw.items()]) progress_task_names[-1] = f"{progress_task_names_orig[-1]}\[{attrs}]" - - if progress_bar is not None: - progress_bar.update(progress_task, description='.'.join(progress_task_names)) + update_process_status(description='.'.join(progress_task_names)) @contextlib.contextmanager def declare_subcommand(command): - progress_bar and progress_bar.update(progress_task, command=command) + update_process_status(command=command) progress_bar and progress_bar.reset(progress_task) try: yield command finally: - progress_bar and progress_bar.update(progress_task, command="") + update_process_status(command="") +def update_process_status(command=None, description=None): + if progress_bar is not None: + # elapsed time + elapsed = str(datetime.now() - _start_time).split('.', 1)[0] + # CPU and memory + cpu = psutil.cpu_percent() + mem = psutil.virtual_memory() + used = round(mem.total*mem.percent/100 / 2**30) + total = round(mem.total / 2**30) + updates = dict(elapsed_time=elapsed, + cpu_info=f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G") + if command is not None: + updates['command'] = command + if description is not None: + updates['description'] = description + progress_bar.update(progress_task, **updates) + async def run_process_status_update(): if progress_bar: with contextlib.suppress(asyncio.CancelledError): while True: - cpu = psutil.cpu_percent() - mem = psutil.virtual_memory() - used = round(mem.total*mem.percent/100 / 2**30) - total = round(mem.total / 2**30) - progress_bar.update(progress_task, cpu_info=f"CPU [green]{cpu}%[/green] RAM [green]{used}[/green]/[green]{total}[/green]G") + update_process_status() await asyncio.sleep(1) From e7adf8e2aa0732566bd5bf1e96d3dd2453eeb26a Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 23 Mar 2022 16:03:48 +0200 Subject: [PATCH 25/37] added run.node config entry, and allowed config entries as keys in assign_based_on --- stimela/config.py | 4 ++-- stimela/kitchen/recipe.py | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/stimela/config.py b/stimela/config.py index 113f0d75..12652b88 100644 --- a/stimela/config.py +++ b/stimela/config.py @@ -1,5 +1,5 @@ import glob -import os, os.path, time, re, logging +import os, os.path, time, re, logging, platform from typing import Any, List, Dict, Optional, Union from enum import Enum from dataclasses import dataclass, field @@ -206,7 +206,7 @@ def _load(conf, config_file): # add runtime info _ds = time.strftime("%Y%m%d") _ts = time.strftime("%H%M%S") - runtime = dict(date=_ds, time=_ts, datetime=f"{_ds}-{_ts}") + runtime = dict(date=_ds, time=_ts, datetime=f"{_ds}-{_ts}", node=platform.node()) conf.run = OmegaConf.create(runtime) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 0b722700..9aebfb92 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -405,6 +405,18 @@ def validate_assignments(self, assign, assign_based_on, location): # raise RecipeValidationError(f"'{location}.{assign_label}.{key}' clashes with an {io_label}") def update_assignments(self, assign, assign_based_on, params, location=""): + + def resolve_config_variable(key): + path = key.split('.') + varname = path[-1] + section = self.config + for element in path[:-1]: + if element in section: + section = section[element] + else: + raise AssignmentError("{location}.assign_based_on.{basevar}: '{element}' in '{key}' is not a valid config section") + return section, varname + for basevar, value_list in assign_based_on.items(): # make sure the base variable is defined if basevar in assign: @@ -413,8 +425,11 @@ def update_assignments(self, assign, assign_based_on, params, location=""): value = str(params[basevar]) elif basevar in self.inputs_outputs and self.inputs_outputs[basevar].default is not None: value = str(self.inputs_outputs[basevar].default) - else: - raise AssignmentError(f"{location}.assign_based_on.{basevar} is an unset variable or parameter") + elif '.' in basevar: + section, varname = resolve_config_variable(basevar) + if varname not in section: + raise AssignmentError(f"{location}.assign_based_on.{basevar}: is not a valid config entry") + value = str(section[varname]) # look up list of assignments assignments = value_list.get(value, value_list.get('DEFAULT')) if assignments is None: @@ -426,15 +441,7 @@ def update_assignments(self, assign, assign_based_on, params, location=""): else: # vars with dots are config settings if '.' in key: - self.log.debug(f"config assignment: {key}={value}") - path = key.split('.') - varname = path[-1] - section = self.config - for element in path[:-1]: - if element in section: - section = section[element] - else: - raise AssignmentError("{location}.assign_based_on.{basevar}: '{element}' in '{key}' is not a valid config section") + section, varname = resolve_config_variable(key) section[varname] = value # vars without dots are local variables or parameters else: From 959e973486a90e31c66a17a7651ffe174c18dce7 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 23 Mar 2022 22:40:41 +0200 Subject: [PATCH 26/37] added stinmela help facility --- stimela/commands/help.py | 138 ++++++++++++++++++++++++++++++++++++ stimela/commands/run.py | 2 - stimela/kitchen/recipe.py | 43 ++++++++--- stimela/main.py | 2 +- stimela/tests/test_xrun.yml | 4 +- 5 files changed, 174 insertions(+), 15 deletions(-) create mode 100644 stimela/commands/help.py diff --git a/stimela/commands/help.py b/stimela/commands/help.py new file mode 100644 index 00000000..52267297 --- /dev/null +++ b/stimela/commands/help.py @@ -0,0 +1,138 @@ +import fnmatch +import os, sys +import click +from omegaconf import OmegaConf + +import stimela +from scabha import configuratt +from stimela import logger +from stimela.main import cli +from scabha.cargo import Cab +from stimela.kitchen.recipe import Recipe, Step, join_quote +from stimela.config import ConfigExceptionTypes +from typing import * +from rich.tree import Tree +from rich.table import Table +from rich import box +from rich import print as rich_print + +def print_schema_help(cargo_type, name, cargo, extra_defaults={}, omit_params={}): + print( +f""" + {cargo_type} name: {name} + Info: {cargo.info} +""") + def print_inputs_outputs(inputs_outputs): + for name, schema in inputs_outputs.items(): + line = f" {name}: {schema.dtype}" + default = extra_defaults.get(name) + if default is None: + default = schema.default + if default is not None: + line += f" = {default}" + line += "\n" + if schema.info: + line = f" {schema.info}\n" + print(line) + if cargo.inputs: + print(" Inputs:\n") + print_inputs_outputs(cargo.inputs) + else: + print(" Inputs: none\n") + if cargo.outputs: + print(" Outputs:\n") + print_inputs_outputs(cargo.outputs) + else: + print(" Outputs: none\n") + if type(cargo) is Recipe: + print("\n Steps") + if any(step.skip for step in cargo.steps.values()): + print(" including [skipped] steps") + step_names = [] + for label, step in cargo.steps.items(): + step_names.append(f"[{label}]" if step.skip else label) + print(f":\n {' '.join(step_names)}") + + +@cli.command("help", + help=""" + Print help on a cab or a recipe. + """) +@click.option("do_list", "-l", "--list", is_flag=True, + help="""Lists the available cabs and recipes, including custom-defined ones.""") +@click.argument("items", nargs=-1, metavar="filename.yml|cab name|recipe name|...") +def help(items: List[str] = [], do_list=False): + + log = logger() + top_tree = Tree(f"stimela help {' '.join(items)}", guide_style="dim") + found_something = False + + for item in items: + # a filename -- treat it as a config + if os.path.isfile(item): + log.info(f"loading recipe/config {item}") + + # if file contains a recipe entry, treat it as a full config (that can include cabs etc.) + try: + conf = configuratt.load(item, use_sources=[stimela.CONFIG]) + except ConfigExceptionTypes as exc: + log.error(f"error loading {item}: {exc}") + sys.exit(2) + + # anything that is not a standard config section will be treated as a recipe + recipes = [name for name in conf if name not in stimela.CONFIG] + + for name in recipes: + # cast section to Recipe and remove from loaded conf + recipe = OmegaConf.create(Recipe) + recipe = OmegaConf.unsafe_merge(recipe, conf[name]) + del conf[name] + # add to global namespace + stimela.CONFIG.lib.recipes[name] = recipe + + # the rest is safe to merge into config as is + stimela.CONFIG = OmegaConf.merge(stimela.CONFIG, conf) + + # else treat as a wildcard for recipe names or cab names + else: + recipe_names = fnmatch.filter(stimela.CONFIG.lib.recipes.keys(), item) + cab_names = fnmatch.filter(stimela.CONFIG.cabs.keys(), item) + if not recipe_names and not cab_names: + log.error(f"'{item}' does not match any files, recipes or cab names") + sys.exit(2) + + for name in recipe_names: + recipe = Recipe(**stimela.CONFIG.lib.recipes[name]) + recipe.finalize() + tree = top_tree.add(f"Recipe: [bold]{name}[/bold]") + recipe.rich_help(tree) + + for name in cab_names: + cab = Cab(**stimela.CONFIG.cabs[name]) + cab.finalize() + tree = top_tree.add(f"Cab: [bold]{name}[/bold]") + cab.rich_help(tree) + + found_something = True + + if do_list or not found_something: + if stimela.CONFIG.lib.recipes: + subtree = top_tree.add("Recipes:") + table = Table.grid("", "", padding=(0,2)) + for name, recipe in stimela.CONFIG.lib.recipes.items(): + table.add_row(f"[bold]{name}[/bold]", recipe.info) + subtree.add(table) + elif not do_list and not found_something: + log.error(f"nothing particular to help on, perhaps specify a recipe name or a cab name, or use -l/--list") + sys.exit(2) + + if do_list: + subtree = top_tree.add("Cabs:") + table = Table.grid("", "", padding=(0,2)) + for name, cab in stimela.CONFIG.cabs.items(): + table.add_row(f"[bold]{name}[/bold]", cab.info) + subtree.add(table) + + rich_print(top_tree) + + diff --git a/stimela/commands/run.py b/stimela/commands/run.py index b22372c6..7c73e0b5 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -79,8 +79,6 @@ def print_inputs_outputs(inputs_outputs): help="""Sets skip=false on the given step(s). Use commas, or give multiple times for multiple steps.""") @click.option("-d", "--dry-run", is_flag=True, help="""Doesn't actually run anything, only prints the selected steps.""") -@click.option("-h", "--help", is_flag=True, - help="""Prints help on the selected runnable and its parameters, then exits.""") @click.argument("what", metavar="filename.yml|CAB") @click.argument("parameters", nargs=-1, metavar="[recipe name] [PARAM=VALUE] [X.Y.Z=FOO] ...", required=False) def run(what: str, parameters: List[str] = [], dry_run: bool = False, help: bool = False, diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 0b722700..fc4c286e 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -512,7 +512,7 @@ class AliasInfo(object): from_recipe: bool = False # if True, value propagates from recipe up to step from_step: bool = False # if True, value propagates from step down to recipe - def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): + def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple], category: Optional[int] = None): wildcards = False if type(alias_target) is str: step_spec, step_param_name = alias_target.split('.', 1) @@ -544,7 +544,7 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): continue else: raise RecipeValidationError(f"alias '{alias_name}' refers to unknown step parameter '{step_label}.{step_param_name}'", log=self.log) - # implicit inuts cannot be aliased + # implicit inputs cannot be aliased if input_schema and input_schema.implicit: raise RecipeValidationError(f"alias '{alias_name}' refers to implicit input '{step_label}.{step_param_name}'", log=self.log) # if alias is already defined, check for conflicts @@ -562,14 +562,22 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple]): # else alias not yet defined, insert a schema else: io = self.inputs if input_schema else self.outputs - # having an own default in the schema overrides the step's default - own_default = None - if alias_name in io and io[alias_name].default is not None: - own_default = io[alias_name].default + # if we have a schema defined for the alias, some params must be inherited from it + own_schema = io.get(alias_name) + # define schema based on copy of the target io[alias_name] = copy.copy(schema) - alias_schema = io[alias_name] # make copy of schema object - if own_default is not None: - alias_schema.default = own_default + alias_schema = io[alias_name] + # default set from own schema + if own_schema is not None and own_schema.default is not None: + alias_schema.default = own_schema.default + # required flag overrides, if set from our own schema + if own_schema is not None and own_schema.required: + alias_schema.required = True + # category is set by argument, else from own schema, else from target + if category is not None: + alias_schema.category = category + elif own_schema is not None and own_schema.category is not None: + alias_schema.category = own_schema.category # if step parameter is implicit, mark the alias as implicit. Note that this only applies to outputs if schema.implicit: @@ -652,7 +660,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): auto_name = f"{label}_{name}" if auto_name in self.inputs or auto_name in self.outputs: raise RecipeValidationError(f"auto-generated parameter name '{auto_name}' conflicts with another name. Please define an explicit alias for this.", log=log) - self._add_alias(auto_name, (step, label, name)) + self._add_alias(auto_name, (step, label, name), category=3) # these will be re-merged when needed again self._inputs_outputs = None @@ -881,6 +889,21 @@ def summary(self, params: Dict[str, Any], recursive=True, ignore_missing=False): _root_recipe_ns = None + def rich_help(self, tree): + Cargo.rich_help(self, tree) + if self.for_loop: + loop_tree = tree.add("For loop:") + loop_tree.add(f"iterating [bold]{self.for_loop.var}[/bold] over {len(self._for_loop_values)} values") + steps_tree = tree.add("Steps (including [italic]skipped[/italic] steps):") + steps = [] + for label, step in self.steps.items(): + if step.skip: + steps.append(f"[italic]{label}[/italic]") + else: + steps.append(label) + steps_tree.add(", ".join(steps)) + + def _run(self, params) -> Dict[str, Any]: """Internal recipe run method. Meant to be called from a wrapper Step object (which validates the parameters, etc.) diff --git a/stimela/main.py b/stimela/main.py index bb60612b..b2354d2e 100644 --- a/stimela/main.py +++ b/stimela/main.py @@ -93,7 +93,7 @@ def cli(backend, config_files=[], verbose=False): # import commands -from stimela.commands import run, images, build, push, save_config +from stimela.commands import run, images, build, push, save_config, help ## the ones not listed above haven't been converted to click yet. They are: # cabs, clean, containers, kill, ps, pull diff --git a/stimela/tests/test_xrun.yml b/stimela/tests/test_xrun.yml index 7871c85e..9f660265 100644 --- a/stimela/tests/test_xrun.yml +++ b/stimela/tests/test_xrun.yml @@ -49,9 +49,9 @@ opts: symlink: logs -recipe: +xrun_recipe: name: "demo recipe" - info: 'top level recipe definition' + info: 'demo recipe with loops and sleeps' for_loop: var: x From 2d6524078d3ea40d9d1a114a846322b989a8581b Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 24 Mar 2022 16:37:39 +0200 Subject: [PATCH 27/37] prettified rich_help scheme --- stimela/commands/help.py | 70 +++++++++++------------------- stimela/config.py | 15 ++++--- stimela/kitchen/recipe.py | 40 ++++++++++------- stimela/tests/test_loop_recipe.yml | 2 - 4 files changed, 59 insertions(+), 68 deletions(-) diff --git a/stimela/commands/help.py b/stimela/commands/help.py index 52267297..bc3344fa 100644 --- a/stimela/commands/help.py +++ b/stimela/commands/help.py @@ -7,7 +7,7 @@ from scabha import configuratt from stimela import logger from stimela.main import cli -from scabha.cargo import Cab +from scabha.cargo import Cab, ParameterCategory from stimela.kitchen.recipe import Recipe, Step, join_quote from stimela.config import ConfigExceptionTypes from typing import * @@ -16,57 +16,39 @@ from rich import box from rich import print as rich_print -def print_schema_help(cargo_type, name, cargo, extra_defaults={}, omit_params={}): - print( -f""" - {cargo_type} name: {name} - Info: {cargo.info} -""") - def print_inputs_outputs(inputs_outputs): - for name, schema in inputs_outputs.items(): - line = f" {name}: {schema.dtype}" - default = extra_defaults.get(name) - if default is None: - default = schema.default - if default is not None: - line += f" = {default}" - line += "\n" - if schema.info: - line = f" {schema.info}\n" - print(line) - if cargo.inputs: - print(" Inputs:\n") - print_inputs_outputs(cargo.inputs) - else: - print(" Inputs: none\n") - if cargo.outputs: - print(" Outputs:\n") - print_inputs_outputs(cargo.outputs) - else: - print(" Outputs: none\n") - if type(cargo) is Recipe: - print("\n Steps") - if any(step.skip for step in cargo.steps.values()): - print(" including [skipped] steps") - step_names = [] - for label, step in cargo.steps.items(): - step_names.append(f"[{label}]" if step.skip else label) - print(f":\n {' '.join(step_names)}") - - @cli.command("help", help=""" Print help on a cab or a recipe. """) @click.option("do_list", "-l", "--list", is_flag=True, help="""Lists the available cabs and recipes, including custom-defined ones.""") +@click.option("-I", "--implicit", is_flag=True, + help="""Increases level of detail to include implicit inputs/outputs.""") +@click.option("-O", "--obscure", is_flag=True, + help="""Increases level of detail to include implicit and obscure inputs/outputs.""") +@click.option("-A", "--all", is_flag=True, + help="""Increases level of detail to include all inputs/outputs.""") +@click.option("-R", "--required", is_flag=True, + help="""Decreases level of detail to include required inputs/outputs only.""") @click.argument("items", nargs=-1, metavar="filename.yml|cab name|recipe name|...") -def help(items: List[str] = [], do_list=False): +def help(items: List[str] = [], do_list=False, implicit=False, obscure=False, all=False, required=False): log = logger() top_tree = Tree(f"stimela help {' '.join(items)}", guide_style="dim") found_something = False + if required: + max_category = ParameterCategory.Required + else: + max_category = ParameterCategory.Optional + if all: + max_category = ParameterCategory.Hidden + elif obscure: + max_category = ParameterCategory.Obscure + elif implicit: + max_category = ParameterCategory.Implicit + + for item in items: # a filename -- treat it as a config if os.path.isfile(item): @@ -98,20 +80,20 @@ def help(items: List[str] = [], do_list=False): recipe_names = fnmatch.filter(stimela.CONFIG.lib.recipes.keys(), item) cab_names = fnmatch.filter(stimela.CONFIG.cabs.keys(), item) if not recipe_names and not cab_names: - log.error(f"'{item}' does not match any files, recipes or cab names") + log.error(f"'{item}' does not match any files, recipes or cab names. Try -l/--list") sys.exit(2) for name in recipe_names: recipe = Recipe(**stimela.CONFIG.lib.recipes[name]) - recipe.finalize() + recipe.finalize(fqname=name) tree = top_tree.add(f"Recipe: [bold]{name}[/bold]") - recipe.rich_help(tree) + recipe.rich_help(tree, max_category=max_category) for name in cab_names: cab = Cab(**stimela.CONFIG.cabs[name]) cab.finalize() tree = top_tree.add(f"Cab: [bold]{name}[/bold]") - cab.rich_help(tree) + cab.rich_help(tree, max_category=max_category) found_something = True diff --git a/stimela/config.py b/stimela/config.py index 12652b88..38a577b1 100644 --- a/stimela/config.py +++ b/stimela/config.py @@ -77,11 +77,6 @@ class StimelaOptions(object): ## For distributed computes and cpu allocation dist: Dict[str, Any] = EmptyDictDefault() -@dataclass -class StimelaLibrary(object): - params: Dict[str, Any] = EmptyDictDefault() - recipes: Dict[str, Any] = EmptyDictDefault() - steps: Dict[str, Any] = EmptyDictDefault() def DefaultDirs(): return field(default_factory=lambda:dict(indir='.', outdir='.')) @@ -106,7 +101,6 @@ def DefaultDirs(): # set to the config file that was actually found CONFIG_LOADED = None - def merge_extra_config(conf, newconf): from stimela import logger @@ -130,7 +124,14 @@ def load_config(extra_configs=List[str]): stimela_dir = os.path.dirname(stimela.__file__) from stimela.kitchen.recipe import Recipe, Cab - global StimelaConfig + global StimelaConfig, StimelaLibrary + @dataclass + class StimelaLibrary(object): + params: Dict[str, Parameter] = EmptyDictDefault() + recipes: Dict[str, Recipe] = EmptyDictDefault() + steps: Dict[str, Any] = EmptyDictDefault() + misc: Dict[str, Any] = EmptyDictDefault() + @dataclass class StimelaConfig: base: Dict[str, StimelaImage] = EmptyDictDefault() diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index a0439181..3bd8ff35 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -5,7 +5,8 @@ from omegaconf import MISSING, OmegaConf, DictConfig, ListConfig from collections import OrderedDict from collections.abc import Mapping -from pytest import param +import rich.table + from scabha import cargo from pathos.pools import ProcessPool from pathos.serial import SerialPool @@ -19,10 +20,9 @@ from scabha.exceptions import SubstitutionError, SubstitutionErrorList from scabha.validate import Unresolved, join_quote from scabha.substitutions import SubstitutionNS, substitutions_from -from scabha.cargo import Parameter, Cargo, Cab, Batch +from scabha.cargo import Parameter, Cargo, Cab, Batch, ParameterCategory from scabha.types import File, Directory, MS - from . import runners Conditional = Optional[str] @@ -577,6 +577,8 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple], category: # default set from own schema if own_schema is not None and own_schema.default is not None: alias_schema.default = own_schema.default + if own_schema and own_schema.info is not None: + alias_schema.info = own_schema.info # required flag overrides, if set from our own schema if own_schema is not None and own_schema.required: alias_schema.required = True @@ -596,7 +598,7 @@ def _add_alias(self, alias_name: str, alias_target: Union[str, Tuple], category: schema.default is not None or schema.implicit is not None # if the step parameter is set and ours isn't, mark our schema as having a default - if have_step_param and alias_schema.default is not None: + if have_step_param and alias_schema.default is None: alias_schema.default = DeferredAlias(f"{step_label}.{step_param_name}") # alias becomes required if any step parameter it refers to was required, but wasn't already set @@ -667,7 +669,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): auto_name = f"{label}_{name}" if auto_name in self.inputs or auto_name in self.outputs: raise RecipeValidationError(f"auto-generated parameter name '{auto_name}' conflicts with another name. Please define an explicit alias for this.", log=log) - self._add_alias(auto_name, (step, label, name), category=3) + self._add_alias(auto_name, (step, label, name), category=ParameterCategory.Obscure) # these will be re-merged when needed again self._inputs_outputs = None @@ -896,19 +898,27 @@ def summary(self, params: Dict[str, Any], recursive=True, ignore_missing=False): _root_recipe_ns = None - def rich_help(self, tree): - Cargo.rich_help(self, tree) + def rich_help(self, tree, max_category=ParameterCategory.Optional): + Cargo.rich_help(self, tree, max_category=max_category) if self.for_loop: loop_tree = tree.add("For loop:") - loop_tree.add(f"iterating [bold]{self.for_loop.var}[/bold] over {len(self._for_loop_values)} values") - steps_tree = tree.add("Steps (including [italic]skipped[/italic] steps):") - steps = [] - for label, step in self.steps.items(): - if step.skip: - steps.append(f"[italic]{label}[/italic]") + if self._for_loop_values is not None: + over = f"{len(self._for_loop_values)} values" else: - steps.append(label) - steps_tree.add(", ".join(steps)) + over = f"[bold]{self.for_loop.over}[/bold]" + loop_tree.add(f"iterating [bold]{self.for_loop.var}[/bold] over {over}") + if self.steps: + have_skips = any(step.skip for step in self.steps.values()) + steps_tree = tree.add(f"Steps ([italic]italicized[/italic] steps are skipped by default):" + if have_skips else "Steps:") + steps = [] + table = rich.table.Table.grid("", "", "", padding=(0,2)) # , show_header=False, show_lines=False, box=rich.box.SIMPLE) + steps_tree.add(table) + for label, step in self.steps.items(): + style = "italic" if step.skip else "bold" + table.add_row(f"[{style}]{label}[/{style}]", step.info) + else: + steps_tree = tree.add("No recipe steps defined") def _run(self, params) -> Dict[str, Any]: diff --git a/stimela/tests/test_loop_recipe.yml b/stimela/tests/test_loop_recipe.yml index 081928af..4c048344 100644 --- a/stimela/tests/test_loop_recipe.yml +++ b/stimela/tests/test_loop_recipe.yml @@ -23,8 +23,6 @@ lib: cubical_image: name: "cubical_image" info: 'does one step of cubical, followed by one step of imaging' - dirs: - log: logs aliases: ms: [calibrate.ms, image.ms] steps: From 07ea6c38ff32e34f191f60701b64d6c2eaaf7b41 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 24 Mar 2022 16:47:55 +0200 Subject: [PATCH 28/37] for loop status tweak --- stimela/kitchen/recipe.py | 2 +- stimela/stimelogging.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 3bd8ff35..0917aa19 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -997,7 +997,7 @@ def loop_worker(inst, step, label, subst, count, iter_var): # update variables inst.assign[inst.for_loop.var] = iter_var inst.assign[f"{inst.for_loop.var}@index"] = count - stimelogging.declare_subtask_attributes(**{inst.for_loop.var: f"{count+1}/{len(inst._for_loop_values)}"}) + stimelogging.declare_subtask_attributes(f"{count+1}/{len(inst._for_loop_values)}") inst.update_assignments(inst.assign, inst.assign_based_on, inst.fqname) subst.recipe._merge_(inst.assign) # update logfile name (since this may depend on substitutions) diff --git a/stimela/stimelogging.py b/stimela/stimelogging.py index 7805737c..c2e72bf2 100644 --- a/stimela/stimelogging.py +++ b/stimela/stimelogging.py @@ -189,8 +189,9 @@ def declare_subtask(subtask_name): progress_task_names_orig.pop(-1) update_process_status(progress_task, description='.'.join(progress_task_names)) -def declare_subtask_attributes(**kw): - attrs = ','.join([f"{key} {value}" for key, value in kw.items()]) +def declare_subtask_attributes(*args, **kw): + attrs = [str(x) for x in args] + [f"{key} {value}" for key, value in kw.items()] + attrs = ', '.join(attrs) progress_task_names[-1] = f"{progress_task_names_orig[-1]}\[{attrs}]" update_process_status(description='.'.join(progress_task_names)) From 0e1b6e38a1c117786797c3df3d3d916d069e26be Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 24 Mar 2022 22:14:55 +0200 Subject: [PATCH 29/37] make required auto-aliases Required category --- stimela/kitchen/recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 0917aa19..06a09abd 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -669,7 +669,8 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): auto_name = f"{label}_{name}" if auto_name in self.inputs or auto_name in self.outputs: raise RecipeValidationError(f"auto-generated parameter name '{auto_name}' conflicts with another name. Please define an explicit alias for this.", log=log) - self._add_alias(auto_name, (step, label, name), category=ParameterCategory.Obscure) + self._add_alias(auto_name, (step, label, name), + category=ParameterCategory.Required if schema.required else ParameterCategory.Obscure) # these will be re-merged when needed again self._inputs_outputs = None From 14c704d4f536814dee02dd80185f4870adec9de1 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Fri, 25 Mar 2022 16:51:02 +0200 Subject: [PATCH 30/37] set ignore_missing to True by default for log_summary() --- stimela/kitchen/recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 06a09abd..3fb20724 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -169,7 +169,7 @@ def prevalidate(self, subst: Optional[SubstitutionNS]=None): raise StepValidationError(f"{self.cargo.name} has the following invalid parameters: {join_quote(self.invalid_params)}") return params - def log_summary(self, level, title, color=None, ignore_missing=False): + def log_summary(self, level, title, color=None, ignore_missing=True): extra = dict(color=color, boldface=True) if self.log.isEnabledFor(level): self.log.log(level, f"### {title}", extra=extra) From 11068878c2342597a6472b5d03453c7ec1cc571b Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 30 Mar 2022 11:12:29 +0200 Subject: [PATCH 31/37] cleaned up variable assignments based on new parser, made 'skip' evaluatable --- stimela/cargo/cab/wsclean.yaml | 10 +- stimela/cargo/cab/wsclean_pol.yaml | 18 +-- stimela/commands/run.py | 39 +---- stimela/kitchen/recipe.py | 243 ++++++++++++++++++----------- stimela/tests/test_loop_recipe.yml | 1 + 5 files changed, 167 insertions(+), 144 deletions(-) diff --git a/stimela/cargo/cab/wsclean.yaml b/stimela/cargo/cab/wsclean.yaml index e0ceb667..056d5e2e 100644 --- a/stimela/cargo/cab/wsclean.yaml +++ b/stimela/cargo/cab/wsclean.yaml @@ -21,26 +21,26 @@ outputs: dirty: info: Dirty images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-dirty.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-dirty.fits)" must_exist: false restored: info: Restored images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-image.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-image.fits)" must_exist: false restored_timeint: info: Restored images per time interval dtype: List[File] - implicit: "{current.prefix}-t[0-9][0-9][0-9][0-9]-image.fits" + implicit: "=GLOB({current.prefix}-t[0-9][0-9][0-9][0-9]-image.fits)" must_exist: false residual: dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-residual.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-residual.fits)" must_exist: false model: info: Model images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-model.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-model.fits)" must_exist: false restored_mfs: info: Restored MFS image diff --git a/stimela/cargo/cab/wsclean_pol.yaml b/stimela/cargo/cab/wsclean_pol.yaml index a7b48d88..c74b01d6 100644 --- a/stimela/cargo/cab/wsclean_pol.yaml +++ b/stimela/cargo/cab/wsclean_pol.yaml @@ -27,44 +27,44 @@ outputs: dirty: info: Dirty images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-dirty.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-dirty.fits)" must_exist: false restored: info: Restored images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-image.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-image.fits)" must_exist: false residual: dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-residual.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-residual.fits)" must_exist: false model: info: Model images dtype: List[File] - implicit: "{current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-model.fits" + implicit: "=GLOB({current.prefix}-[0-9][0-9][0-9][0-9]-[IQUV]-model.fits)" must_exist: false restored_mfs_i: info: Restored MFS images dtype: File - implicit: "{current.prefix}-MFS-I-image.fits" + implicit: "=GLOB({current.prefix}-MFS-I-image.fits)" must_exist: false restored_mfs: info: Restored MFS images dtype: List[File] - implicit: "{current.prefix}-MFS-[IQUV]-image.fits" + implicit: "=GLOB({current.prefix}-MFS-[IQUV]-image.fits)" must_exist: false residual_mfs: info: Residual MFS images dtype: List[File] - implicit: "{current.prefix}-MFS-[IQUV]-residual.fits" + implicit: "=GLOB({current.prefix}-MFS-[IQUV]-residual.fits)" must_exist: false model_mfs: info: Model MFS images dtype: List[File] - implicit: "{current.prefix}-MFS-[IQUV]-model.fits" + implicit: "=GLOB({current.prefix}-MFS-[IQUV]-model.fits)" must_exist: false dirty_mfs: info: Dirty MFS images dtype: List[File] - implicit: "{current.prefix}-MFS-[IQUV]-dirty.fits" + implicit: "=GLOB({current.prefix}-MFS-[IQUV]-dirty.fits)" must_exist: false diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 7c73e0b5..7448d637 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -19,43 +19,6 @@ from stimela.kitchen.recipe import Recipe, Step, join_quote from stimela.config import get_config_class -def print_schema_help(cargo_type, name, cargo, extra_defaults={}, omit_params={}): - print( -f""" - {cargo_type} name: {name} - Info: {cargo.info} -""") - def print_inputs_outputs(inputs_outputs): - for name, schema in inputs_outputs.items(): - line = f" {name}: {schema.dtype}" - default = extra_defaults.get(name) - if default is None: - default = schema.default - if default is not None: - line += f" = {default}" - line += "\n" - if schema.info: - line = f" {schema.info}\n" - print(line) - if cargo.inputs: - print(" Inputs:\n") - print_inputs_outputs(cargo.inputs) - else: - print(" Inputs: none\n") - if cargo.outputs: - print(" Outputs:\n") - print_inputs_outputs(cargo.outputs) - else: - print(" Outputs: none\n") - if type(cargo) is Recipe: - print("\n Steps") - if any(step.skip for step in cargo.steps.values()): - print(" including [skipped] steps") - step_names = [] - for label, step in cargo.steps.items(): - step_names.append(f"[{label}]" if step.skip else label) - print(f":\n {' '.join(step_names)}") - @cli.command("run", help=""" @@ -303,7 +266,7 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, help: bool # apply restrictions, if any recipe.restrict_steps(tagged_steps, force_enable=False) - steps = [name for name, step in recipe.steps.items() if not step.skip] + steps = [name for name, step in recipe.steps.items() if not step._skip] log.info(f"will run the following recipe steps:") log.info(f" {' '.join(steps)}", extra=dict(color="GREEN")) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 3fb20724..7d474170 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -18,7 +18,7 @@ from scabha import validate import scabha.exceptions from scabha.exceptions import SubstitutionError, SubstitutionErrorList -from scabha.validate import Unresolved, join_quote +from scabha.validate import evaluate_and_substitute, Unresolved, join_quote from scabha.substitutions import SubstitutionNS, substitutions_from from scabha.cargo import Parameter, Cargo, Cab, Batch, ParameterCategory from scabha.types import File, Directory, MS @@ -38,7 +38,7 @@ class Step: recipe: Optional["Recipe"] = None # if not None, this step is a nested recipe params: Dict[str, Any] = EmptyDictDefault() # assigns parameter values info: Optional[str] = None # comment or info - skip: bool = False # if true, step is skipped unless explicitly enabled + skip: Optional[str] = None # if this evaluates to True, step is skipped tags: List[str] = EmptyListDefault() backend: Optional[str] = None # backend setting, overrides opts.config.backend if set @@ -50,8 +50,8 @@ class Step: assign_based_on: Dict[str, Any] = EmptyDictDefault() # assigns variables when step is executed based on value of another variable - _skip: Conditional = None # skip this step if conditional evaluates to true - _break_on: Conditional = None # break out (of parent recipe) if conditional evaluates to true + # _skip: Conditional = None # skip this step if conditional evaluates to true + # _break_on: Conditional = None # break out (of parent recipe) if conditional evaluates to true def __post_init__(self): self.fqname = self.fqname or self.name @@ -64,6 +64,15 @@ def __post_init__(self): self.params = OmegaConf.to_container(self.params) # after (pre)validation, this contains parameter values self.validated_params = None + # the "skip" attribute is reevaluated at runtime since it may contain substitutions, but if it's set to a bool + # constant, self._skip will be preset already + if self.skip in {"True", "true", "1"}: + self._skip = True + elif self.skip in {"False", "false", "0", "", None}: + self._skip = False + else: + # otherwise, self._skip stays at None, and will be re-evaluated at runtime + self._skip = None def summary(self, params=None, recursive=True, ignore_missing=False): return self.cargo and self.cargo.summary(recursive=recursive, @@ -143,8 +152,6 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # init and/or update logger options logopts = (logopts if logopts is not None else self.config.opts.log).copy() - if 'log' in self.assign: - logopts.update(**self.assign.log) # update file logging if not recipe (a recipe will do it in its finalize() anyway, with its own substitions) if not self.recipe: @@ -183,29 +190,28 @@ def run(self, subst=None, batch=None): self.prevalidate(self.params) with stimelogging.declare_subtask(self.name) as subtask: + # evaluate the skip attribute (it can be a formula and/or a {}-substititon) + skip = self._skip + if self._skip is None and subst is not None: + skips = dict(skip=self.skip) + evaluate_and_substitute(skips, subst, subst.current, location=[self.fqname], ignore_subst_errors=False) + skip = skips["skip"] # Since prevalidation will have populated default values for potentially missing parameters, use those values # For parameters that aren't missing, use whatever value that was suplied params = self.validated_params.copy() params.update(**self.params) - # # However the unresolved ones should be - # params = self.validated_params - # for name, value in params.items(): - # if type(value) is Unresolved: - # params[name] = self.params[name] - # # params = self.params - skip_warned = False # becomes True when warnings are given self.log.debug(f"validating inputs {subst and list(subst.keys())}") validated = None try: - params = self.cargo.validate_inputs(params, loosely=self.skip, subst=subst) + params = self.cargo.validate_inputs(params, loosely=skip, subst=subst) validated = True except ScabhaBaseException as exc: - level = logging.WARNING if self.skip else logging.ERROR + level = logging.WARNING if skip else logging.ERROR if not exc.logged: if type(exc) is SubstitutionErrorList: self.log.log(level, f"unresolved {{}}-substitution(s):") @@ -225,7 +231,7 @@ def run(self, subst=None, batch=None): self.validated_params.update(**params) # log inputs - if validated and not self.skip: + if validated and not skip: self.log_summary(logging.INFO, "validated inputs", color="GREEN", ignore_missing=True) if subst is not None: subst.current = params @@ -241,7 +247,7 @@ def run(self, subst=None, batch=None): else: raise StepValidationError(f"invalid inputs: {join_quote(invalid)}", log=self.log) - if not self.skip: + if not skip: try: if type(self.cargo) is Recipe: self.cargo.backend = self.cargo.backend or self.backend @@ -256,12 +262,14 @@ def run(self, subst=None, batch=None): self.log.error(f"error running step: {exc}") exc.logged = True raise + else: + self.log.info("this step is being skipped") self.log.debug(f"validating outputs") validated = False try: - params = self.cargo.validate_outputs(params, loosely=self.skip, subst=subst) + params = self.cargo.validate_outputs(params, loosely=skip, subst=subst) validated = True except ScabhaBaseException as exc: level = logging.WARNING if self.skip else logging.ERROR @@ -274,7 +282,7 @@ def run(self, subst=None, batch=None): self.log.log(level, f"error validating outputs: {exc}") exc.logged = True # raise up, unless step is being skipped - if self.skip: + if skip: self.log.warning("since the step was skipped, this is not fatal") else: self.log_summary(level, "failed outputs", color="WARNING") @@ -289,7 +297,7 @@ def run(self, subst=None, batch=None): # bomb out if an output was invalid invalid = [name for name in self.invalid_params + self.unresolved_params if name in self.cargo.outputs] if invalid: - if self.skip: + if skip: self.log.warning(f"invalid outputs: {join_quote(invalid)}") self.log.warning("since the step was skipped, this is not fatal") else: @@ -404,53 +412,109 @@ def validate_assignments(self, assign, assign_based_on, location): # if key in io: # raise RecipeValidationError(f"'{location}.{assign_label}.{key}' clashes with an {io_label}") - def update_assignments(self, assign, assign_based_on, params, location=""): + def update_assignments(self, subst: SubstitutionNS, whose = None, params: Dict[str, Any] = {}, ignore_subst_errors: bool = False): + """Updates variable assignments, using the recipe's (or a step's) 'assign' and 'assign_based_on' sections. + Also updates the corresponding (recipe or step's) file logger. + + Args: + subst (SubstitutionNS): substitution namespace + whose (Step or None): if None, use recipe's (self) assignments, else use this step's + params (dict, optional): dictionary of parameters + + Raises: + AssignmentError: on errors + """ + whose = whose or self + # short-circuit out if nothong to do + if not whose.assign and not whose.assign_based_on: + return - def resolve_config_variable(key): + def resolve_config_variable(key, base=self.config): + """helper function to look up a key like a.b.c in a nested dict-like structure""" path = key.split('.') varname = path[-1] - section = self.config + section = base for element in path[:-1]: if element in section: section = section[element] else: - raise AssignmentError("{location}.assign_based_on.{basevar}: '{element}' in '{key}' is not a valid config section") + raise AssignmentError(f"{whose.fqname}.assign_based_on.{basevar}: '{element}' in '{key}' is not a valid config section") return section, varname - for basevar, value_list in assign_based_on.items(): + # split into config and non-config assignments + config_assign = OrderedDict((key, value) for key, value in whose.assign.items() if '.' in key) + assign = OrderedDict((key, value) for key, value in whose.assign.items() if '.' not in key) + + # merge non-config assignments into ns.recipe + if assign: + subst.recipe._merge_(assign) + evaluate_and_substitute(assign, subst, subst.recipe, location=[whose.fqname], ignore_subst_errors=ignore_subst_errors) + # accumulate anew below + assign = {} + + # now process assign_based_on entries + for basevar, value_list in whose.assign_based_on.items(): # make sure the base variable is defined - if basevar in assign: - value = str(assign[basevar]) - elif basevar in params: - value = str(params[basevar]) + # it will be in subst.recipe if it was assigned, or is an input + if basevar in subst.recipe: + value = str(subst.recipe[basevar]) elif basevar in self.inputs_outputs and self.inputs_outputs[basevar].default is not None: value = str(self.inputs_outputs[basevar].default) elif '.' in basevar: section, varname = resolve_config_variable(basevar) if varname not in section: - raise AssignmentError(f"{location}.assign_based_on.{basevar}: is not a valid config entry") + raise AssignmentError(f"{whose.fqname}.assign_based_on.{basevar}: is not a valid config entry") value = str(section[varname]) + else: + raise AssignmentError(f"{whose.fqname}.assign_based_on.{basevar} is not a known input or variable") # look up list of assignments assignments = value_list.get(value, value_list.get('DEFAULT')) if assignments is None: - raise AssignmentError(f"{location}.assign_based_on.{basevar}: unknown value '{value}', and no default defined") - # update assignments + raise AssignmentError(f"{whose.fqname}.assign_based_on.{basevar}: unknown value '{value}', and no default defined") + # update assignments (also inside ns.recipe) for key, value in assignments.items(): if key in self._protected_from_assign: self.log.debug(f"skipping protected assignment {key}={value}") + elif key in self.inputs_outputs: + self.log.debug(f"params assignment: {key}={value}") + params[key] = value + subst.recipe[key] = value + elif '.' not in key: + self.log.debug(f"variable assignment: {key}={value}") + assign[key] = value + subst.recipe[key] = value + self.log.debug(f"variable assignment: {key}={value}") else: - # vars with dots are config settings - if '.' in key: - section, varname = resolve_config_variable(key) - section[varname] = value - # vars without dots are local variables or parameters - else: - if key in self.inputs_outputs: - self.log.debug(f"params assignment: {key}={value}") - params[key] = value - else: - self.log.debug(f"variable assignment: {key}={value}") - self.assign[key] = value + config_assign[key] = value + + # merge any new assignments into ns.recipe + if assign: + subst.recipe._merge_(assign) + evaluate_and_substitute(assign, subst, subst.recipe, location=[whose.fqname], ignore_subst_errors=ignore_subst_errors) + + # propagate config assignments + logopts_changed = False + for key, value in config_assign.items(): + # log.opt are log options + if key.startswith("log."): + self.log.debug(f"log options assignment: {key}={value}") + whose.logopts[key.split('.', 1)[1]] = value + logopts_changed = True + # vars with dots are config settings + elif '.' in key: + self.log.debug(f"config assignment: {key}={value}") + # change system config + section, varname = resolve_config_variable(key) + section[varname] = value + # do the same for the subst dict. Note that if the above succeeded, then key + # is a valid config entry, so it will also be look-uppable in subst + section, varname = resolve_config_variable(key, base=subst.config) + section[varname] = value + + # propagate log options + if logopts_changed: + stimelogging.update_file_logger(whose.log, whose.logopts, nesting=whose.nesting, subst=subst, location=[whose.fqname]) + @property def finalized(self): @@ -461,11 +525,15 @@ def enable_step(self, label, enable=True): step = self.steps.get(label) if step is None: raise RecipeValidationError(f"unknown step {label}", log=self.log) - if step.skip and enable: - self.log.warning(f"enabling step '{label}' which was previously marked as skipped") - elif not step.skip and not enable: + if enable: + if step._skip is True: + self.log.warning(f"enabling step '{label}' which was previously marked as skipped") + elif step._skip is not False: + self.log.warning(f"enabling step '{label}' which was previously marked as potentially skipped ('{self.skip}')") + step.skip = self._skip = False + else: self.log.warning(f"will skip step '{label}'") - step.skip = not enable + step.skip = self._skip = True def restrict_steps(self, steps: List[str], force_enable=True): self.finalize() @@ -478,9 +546,9 @@ def restrict_steps(self, steps: List[str], force_enable=True): # apply skip flags for label, step in self.steps.items(): if label not in restrict_steps: - step.skip = True + step.skip = step._skip = True elif force_enable: - step.skip = False + step.skip = step._skip = False def add_step(self, step: Step, label: str = None): """Adds a step to the recipe. Label is auto-generated if not supplied @@ -625,22 +693,20 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): self.validate_assignments(step.assign, step.assign_based_on, f"{fqname}.{label}") # init and/or update logger options - logopts = (logopts if logopts is not None else config.opts.log).copy() - if 'log' in self.assign: - logopts.update(**self.assign.log) + self.logopts = (logopts if logopts is not None else config.opts.log).copy() # update file logger logsubst = SubstitutionNS(config=config, info=dict(fqname=fqname)) - stimelogging.update_file_logger(log, logopts, nesting=nesting, subst=logsubst, location=[self.fqname]) + stimelogging.update_file_logger(log, self.logopts, nesting=nesting, subst=logsubst, location=[self.fqname]) # call Cargo's finalize method - super().finalize(config, log=log, logopts=logopts, fqname=fqname, nesting=nesting) + super().finalize(config, log=log, logopts=self.logopts, fqname=fqname, nesting=nesting) # finalize steps for label, step in self.steps.items(): step_log = log.getChild(label) step_log.propagate = True - step.finalize(config, log=step_log, logopts=logopts, fqname=f"{fqname}.{label}", nesting=nesting+1) + step.finalize(config, log=step_log, logopts=self.logopts, fqname=f"{fqname}.{label}", nesting=nesting+1) # collect aliases self._alias_map = OrderedDict() @@ -709,21 +775,21 @@ def prevalidate(self, params: Optional[Dict[str, Any]], subst: Optional[Substitu self.log.debug("prevalidating recipe") errors = [] - # update assignments - self.update_assignments(self.assign, self.assign_based_on, params=params, location=self.fqname) - subst_outer = subst # outer dictionary is used to prevalidate our parameters subst = SubstitutionNS() - info = SubstitutionNS(fqname=self.fqname) + info = SubstitutionNS(fqname=self.fqname, label='', label_parts=[], suffix='') # mutable=False means these sub-namespaces are not subject to {}-substitutions subst._add_('info', info, nosubst=True) subst._add_('config', self.config, nosubst=True) subst._add_('steps', {}, nosubst=True) subst._add_('previous', {}, nosubst=True) - subst._add_('recipe', self.make_substitition_namespace(params=params, ns=self.assign)) + subst._add_('recipe', {}) subst.recipe._merge_(params) + # update assignments + self.update_assignments(subst, params=params, ignore_subst_errors=True) + # add for-loop variable to inputs, if expected there if self.for_loop is not None and self.for_loop.var in self.inputs: params[self.for_loop.var] = Unresolved("for-loop") @@ -857,7 +923,9 @@ def validate_inputs(self, params: Dict[str, Any], subst: Optional[SubstitutionNS info = SubstitutionNS(fqname=self.fqname) subst._add_('info', info, nosubst=True) subst._add_('config', self.config, nosubst=True) - subst._add_('recipe', self.make_substitition_namespace(params=params, ns=self.assign)) + subst._add_('recipe', self.make_substitition_namespace(params)) + # 'current' points to recipe's parameters initially, + subst.current = subst.recipe return Cargo.validate_inputs(self, params, subst=subst, loosely=loosely) @@ -909,14 +977,14 @@ def rich_help(self, tree, max_category=ParameterCategory.Optional): over = f"[bold]{self.for_loop.over}[/bold]" loop_tree.add(f"iterating [bold]{self.for_loop.var}[/bold] over {over}") if self.steps: - have_skips = any(step.skip for step in self.steps.values()) + have_skips = any(step._skip for step in self.steps.values()) steps_tree = tree.add(f"Steps ([italic]italicized[/italic] steps are skipped by default):" if have_skips else "Steps:") steps = [] table = rich.table.Table.grid("", "", "", padding=(0,2)) # , show_header=False, show_lines=False, box=rich.box.SIMPLE) steps_tree.add(table) for label, step in self.steps.items(): - style = "italic" if step.skip else "bold" + style = "italic" if step._skip else "bold" table.add_row(f"[{style}]{label}[/{style}]", step.info) else: steps_tree = tree.add("No recipe steps defined") @@ -940,12 +1008,13 @@ def _run(self, params) -> Dict[str, Any]: # set up substitution namespace subst = SubstitutionNS() - info = SubstitutionNS(fqname=self.fqname) + info = SubstitutionNS(fqname=self.fqname, label='', label_parts=[], suffix='') # nosubst=True means these sub-namespaces are not subject to {}-substitutions subst._add_('info', info, nosubst=True) + subst._add_('config', self.config, nosubst=True) subst._add_('steps', {}, nosubst=True) subst._add_('previous', {}, nosubst=True) - recipe_ns = self.make_substitition_namespace(params=params, ns=self.assign) + recipe_ns = self.make_substitition_namespace(params) subst._add_('recipe', recipe_ns) # merge in config sections, except "recipe" which clashes with our namespace @@ -958,12 +1027,8 @@ def _run(self, params) -> Dict[str, Any]: Recipe._root_recipe_ns = recipe_ns subst._add_('root', Recipe._root_recipe_ns) - logopts = self.config.opts.log.copy() - if 'log' in self.assign: - logopts.update(**self.assign.log) - - # update logfile name (since this may depend on substitutions) - stimelogging.update_file_logger(self.log, self.logopts, nesting=self.nesting, subst=subst, location=[self.fqname]) + # update variable assignments + self.update_assignments(subst, params=params) # Harmonise before running self._link_steps() @@ -992,34 +1057,28 @@ def loop_worker(inst, step, label, subst, count, iter_var): Needed for concurrency """ + # update step info + inst._prep_step(label, step, subst) + # if for-loop, assign new value if inst.for_loop: inst.log.info(f"for loop iteration {count}: {inst.for_loop.var} = {iter_var}") - # update variables - inst.assign[inst.for_loop.var] = iter_var + # update variable (in params, if expected there, else in assignments) + if inst.for_loop.var in inst.inputs_outputs: + params[inst.for_loop.var] = iter_var + else: + inst.assign[inst.for_loop.var] = iter_var + # update variable index inst.assign[f"{inst.for_loop.var}@index"] = count stimelogging.declare_subtask_attributes(f"{count+1}/{len(inst._for_loop_values)}") - inst.update_assignments(inst.assign, inst.assign_based_on, inst.fqname) - subst.recipe._merge_(inst.assign) - # update logfile name (since this may depend on substitutions) - stimelogging.update_file_logger(inst.log, inst.logopts, nesting=inst.nesting, subst=subst, location=[inst.fqname]) + # update assignments since for-loop variable will have changed + self.update_assignments(subst, params=params) - # merge in variable assignments and add step params as "current" namespace - self.update_assignments(step.assign, step.assign_based_on, f"{self.name}.{label}") - subst.recipe._merge_(step.assign) + # merge in step's assignments + inst.update_assignments(subst, whose=step, params=params) - # update info - inst._prep_step(label, step, subst) - # update log options again (based on assign.log which may have changed) - if 'log' in step.assign: - logopts.update(**step.assign.log) - - # update logfile name regardless (since this may depend on substitutions) - info.fqname = step.fqname - stimelogging.update_file_logger(step.log, step.logopts, nesting=step.nesting, subst=subst, location=[step.fqname]) - - inst.log.info(f"{'skipping' if step.skip else 'running'} step '{label}'") + inst.log.info(f"processing step '{label}'") try: #step_params = step.run(subst=subst.copy(), batch=batch) # make a copy of the subst dict since recipe might modify step_params = step.run(subst=subst.copy()) # make a copy of the subst dict since recipe might modify diff --git a/stimela/tests/test_loop_recipe.yml b/stimela/tests/test_loop_recipe.yml index 4c048344..8e10ee7b 100644 --- a/stimela/tests/test_loop_recipe.yml +++ b/stimela/tests/test_loop_recipe.yml @@ -82,6 +82,7 @@ cubical_image_loop: steps: calibrate: cab: cubical + skip: "=recipe.loop == 1" image-1: cab: myclean params: From 08187636cb2bb1d6034e7edc352fba5b3dcf9a44 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 30 Mar 2022 12:38:38 +0200 Subject: [PATCH 32/37] improved log message when skipping steps --- stimela/kitchen/recipe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 7d474170..d64845da 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -263,7 +263,10 @@ def run(self, subst=None, batch=None): exc.logged = True raise else: - self.log.info("this step is being skipped") + if self._skip is None and subst is not None: + self.log.info("skipping step based on setting of '{self.skip}'") + else: + self.log.info("skipping step based on explicit setting") self.log.debug(f"validating outputs") validated = False From 8824def0adbdf1a6eae67e376064c856b554e293 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 30 Mar 2022 13:35:14 +0200 Subject: [PATCH 33/37] fixed step-skip conditionals --- stimela/kitchen/recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index d64845da..5b91f1f2 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -194,7 +194,7 @@ def run(self, subst=None, batch=None): skip = self._skip if self._skip is None and subst is not None: skips = dict(skip=self.skip) - evaluate_and_substitute(skips, subst, subst.current, location=[self.fqname], ignore_subst_errors=False) + skips = evaluate_and_substitute(skips, subst, subst.current, location=[self.fqname], ignore_subst_errors=False) skip = skips["skip"] # Since prevalidation will have populated default values for potentially missing parameters, use those values @@ -264,7 +264,7 @@ def run(self, subst=None, batch=None): raise else: if self._skip is None and subst is not None: - self.log.info("skipping step based on setting of '{self.skip}'") + self.log.info(f"skipping step based on setting of '{self.skip}'") else: self.log.info("skipping step based on explicit setting") From 2ed8d9e55da586f21648c069cec30a3f580254d4 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 30 Mar 2022 15:57:24 +0200 Subject: [PATCH 34/37] fixing population of defaults, and loops --- stimela/commands/run.py | 2 +- stimela/kitchen/recipe.py | 39 ++++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 7448d637..1425459f 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -271,7 +271,7 @@ def run(what: str, parameters: List[str] = [], dry_run: bool = False, help: bool log.info(f" {' '.join(steps)}", extra=dict(color="GREEN")) # warn user if som steps remain explicitly disabled - if any(recipe.steps[name].skip for name in tagged_steps): + if any(recipe.steps[name]._skip for name in tagged_steps): log.warning("note that some steps remain explicitly skipped") # in debug mode, pretty-print the recipe diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 5b91f1f2..c91f49f4 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -73,7 +73,7 @@ def __post_init__(self): else: # otherwise, self._skip stays at None, and will be re-evaluated at runtime self._skip = None - + def summary(self, params=None, recursive=True, ignore_missing=False): return self.cargo and self.cargo.summary(recursive=recursive, params=params or self.validated_params or self.params, ignore_missing=ignore_missing) @@ -161,6 +161,16 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): # finalize the cargo self.cargo.finalize(config, log=log, logopts=logopts, fqname=self.fqname, nesting=nesting) + # build dictionary of defaults from cargo + self.defaults = {name: schema.default for name, schema in self.cargo.inputs_outputs.items() + if schema.default is not None and not isinstance(schema.default, Unresolved) } + self.defaults.update(**self.cargo.defaults) + + # set missing parameters from defaults + for name, value in self.defaults.items(): + if name not in self.params: + self.params[name] = value + # set backend self.backend = self.backend or self.config.opts.backend @@ -168,7 +178,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): def prevalidate(self, subst: Optional[SubstitutionNS]=None): self.finalize() # validate cab or recipe - params = self.validated_params = self.cargo.prevalidate(self.params, subst) + params = self.validated_params = self.cargo.prevalidate(self.params.copy(), subst) self.log.debug(f"{self.cargo.name}: {len(self.missing_params)} missing, " f"{len(self.invalid_params)} invalid and " f"{len(self.unresolved_params)} unresolved parameters") @@ -447,13 +457,12 @@ def resolve_config_variable(key, base=self.config): # split into config and non-config assignments config_assign = OrderedDict((key, value) for key, value in whose.assign.items() if '.' in key) assign = OrderedDict((key, value) for key, value in whose.assign.items() if '.' not in key) + updated = False - # merge non-config assignments into ns.recipe + # merge non-config assignments into ns.recipe, resolve and substitute as appropriate if assign: subst.recipe._merge_(assign) evaluate_and_substitute(assign, subst, subst.recipe, location=[whose.fqname], ignore_subst_errors=ignore_subst_errors) - # accumulate anew below - assign = {} # now process assign_based_on entries for basevar, value_list in whose.assign_based_on.items(): @@ -474,6 +483,7 @@ def resolve_config_variable(key, base=self.config): assignments = value_list.get(value, value_list.get('DEFAULT')) if assignments is None: raise AssignmentError(f"{whose.fqname}.assign_based_on.{basevar}: unknown value '{value}', and no default defined") + updated = True # update assignments (also inside ns.recipe) for key, value in assignments.items(): if key in self._protected_from_assign: @@ -490,8 +500,8 @@ def resolve_config_variable(key, base=self.config): else: config_assign[key] = value - # merge any new assignments into ns.recipe - if assign: + # if anything changed, merge non-config assignments into ns.recipe, resolve and substitute as appropriate + if assign and updated: subst.recipe._merge_(assign) evaluate_and_substitute(assign, subst, subst.recipe, location=[whose.fqname], ignore_subst_errors=ignore_subst_errors) @@ -828,6 +838,9 @@ def prevalidate_self(params): def prevalidate_steps(): for label, step in self.steps.items(): self._prep_step(label, step, subst) + # update, since these may depend on step info + self.update_assignments(subst, params=params, ignore_subst_errors=True) + self.update_assignments(subst, whose=step, params=params, ignore_subst_errors=True) try: step_params = step.prevalidate(subst) @@ -889,7 +902,7 @@ def prevalidate_steps(): def validate_for_loop(self, params, strict=False): # in case of for loops, get list of values to be iterated over if self.for_loop is not None: - # if over != None (see finalize() above), list of values needs to be looked `up in inputs + # if over != None (see finalize() above), list of values needs to be looked up in inputs # if it is None, then an explicit list was supplied and is already in self._for_loop_values. if self.for_loop.over is not None: # check that it's legal @@ -903,7 +916,9 @@ def validate_for_loop(self, params, strict=False): raise ParameterValidationError(f"for_loop.over={self.for_loop.over} is unset") if strict and isinstance(values, Unresolved): raise ParameterValidationError(f"for_loop.over={self.for_loop.over} is unresolved") - if not isinstance(values, (list, tuple)): + if type(values) is ListConfig: + values = list(values) + elif not isinstance(values, (list, tuple)): values = [values] if self._for_loop_values is None: self.log.info(f"recipe is a for-loop with '{self.for_loop.var}' iterating over {len(values)} values") @@ -1074,11 +1089,9 @@ def loop_worker(inst, step, label, subst, count, iter_var): # update variable index inst.assign[f"{inst.for_loop.var}@index"] = count stimelogging.declare_subtask_attributes(f"{count+1}/{len(inst._for_loop_values)}") - # update assignments since for-loop variable will have changed - self.update_assignments(subst, params=params) - - # merge in step's assignments + # update and re-evaluate assignments + inst.update_assignments(subst, params=params) inst.update_assignments(subst, whose=step, params=params) inst.log.info(f"processing step '{label}'") From ce59ff0da4a1fd46baaaead7398b63e43acbfa87 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Wed, 30 Mar 2022 16:06:24 +0200 Subject: [PATCH 35/37] fixed prevalidation --- stimela/kitchen/recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index c91f49f4..6e8eca66 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -178,7 +178,7 @@ def finalize(self, config=None, log=None, logopts=None, fqname=None, nesting=0): def prevalidate(self, subst: Optional[SubstitutionNS]=None): self.finalize() # validate cab or recipe - params = self.validated_params = self.cargo.prevalidate(self.params.copy(), subst) + params = self.validated_params = self.cargo.prevalidate(self.params, subst) self.log.debug(f"{self.cargo.name}: {len(self.missing_params)} missing, " f"{len(self.invalid_params)} invalid and " f"{len(self.unresolved_params)} unresolved parameters") From 4b2479bffe257892214ae34d90418a1a4e0ece25 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Thu, 31 Mar 2022 11:40:13 +0200 Subject: [PATCH 36/37] scabha2 dep version bump --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8549a2f8..5a5205c3 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,11 @@ requirements = ["pyyaml", "nose>=1.3.7", "future-fstrings", - "scabha >= 0.6.1", # @ git+https://github.com/caracal-pipeline/scabha2", + "scabha >= 0.7.0", + ## OMS: not a fan of this: + # @ git+https://github.com/caracal-pipeline/scabha2", + ## ...because it interferes with running scabha2 off a dev branch (i.e. if you have a local dev install of scabha, + ## pip install stimela will blow it away and replace with master branch...) "ruamel.yaml", "munch", "omegaconf>=2.1pre1", From b1913ce5c7ee774ad46ac61c3931207d172af23f Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Tue, 5 Apr 2022 10:57:37 +0200 Subject: [PATCH 37/37] cleaned up imports and tweaked help formatting --- stimela/commands/run.py | 19 ++++++++++--------- stimela/kitchen/recipe.py | 3 +-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/stimela/commands/run.py b/stimela/commands/run.py index 1425459f..d5192208 100644 --- a/stimela/commands/run.py +++ b/stimela/commands/run.py @@ -1,19 +1,20 @@ -from collections import OrderedDict import dataclasses import itertools +import click +import logging +import os.path +import yaml +import sys + from datetime import datetime +from typing import List, Optional +from collections import OrderedDict +from omegaconf.omegaconf import OmegaConf, OmegaConfBaseException -from yaml.error import YAMLError +import stimela from scabha import configuratt from scabha.exceptions import ScabhaBaseException -from omegaconf.omegaconf import OmegaConf, OmegaConfBaseException -from stimela import stimelogging from stimela.config import ConfigExceptionTypes -import click -import logging -import os.path, yaml, sys, asyncio -from typing import List, Optional -import stimela from stimela import logger from stimela.main import cli from stimela.kitchen.recipe import Recipe, Step, join_quote diff --git a/stimela/kitchen/recipe.py b/stimela/kitchen/recipe.py index 6e8eca66..35501101 100644 --- a/stimela/kitchen/recipe.py +++ b/stimela/kitchen/recipe.py @@ -996,9 +996,8 @@ def rich_help(self, tree, max_category=ParameterCategory.Optional): loop_tree.add(f"iterating [bold]{self.for_loop.var}[/bold] over {over}") if self.steps: have_skips = any(step._skip for step in self.steps.values()) - steps_tree = tree.add(f"Steps ([italic]italicized[/italic] steps are skipped by default):" + steps_tree = tree.add(f"Steps (note [italic]some steps[/italic] are skipped by default):" if have_skips else "Steps:") - steps = [] table = rich.table.Table.grid("", "", "", padding=(0,2)) # , show_header=False, show_lines=False, box=rich.box.SIMPLE) steps_tree.add(table) for label, step in self.steps.items():