Skip to content

Commit

Permalink
Simplify method command template (#19)
Browse files Browse the repository at this point in the history
This PR changes the method templates in two ways:

1. `inputs` is now called `parameters`. This is in line with the command
line names in `datalad make`.
2. `executable` and `arguments` are joined into a single
`command`-property. This change removes the somehow artificial
distinction between the command and the rest of the commandline. This
was especially confusing if the command should be built out of multiple
shell commands.
  • Loading branch information
christian-monch authored Oct 22, 2024
2 parents 755dc30 + 365db01 commit 45f9ee8
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 63 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/mypy-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ jobs:
if: steps.changed-py-files.outputs.any_changed == 'true'
run: |
# get any type stubs that mypy thinks it needs
hatch run types:mypy --install-types --non-interactive --ignore-missing-imports --follow-imports skip ${{ steps.changed-py-files.outputs.all_changed_files }}
# specify `--disable-error-code=import-untyped` until the
# datalad-packages have type stubs for all their modules.
hatch run types:mypy --install-types --non-interactive --disable-error-code=import-untyped --follow-imports skip ${{ steps.changed-py-files.outputs.all_changed_files }}
# run mypy on the modified files only, and do not even follow imports.
# this results is a fairly superficial test, but given the overall
# state of annotations, we strive to become more correct incrementally
# with focused error reports, rather than barfing a huge complaint
# that is unrelated to the changeset someone has been working on.
# run on the oldest supported Python version.
# specify `--ignore-missing-imports` until the datalad-packages have
# type stubs for all their modules.
hatch run types:mypy --python-version 3.11 --ignore-missing-imports --follow-imports skip --pretty --show-error-context ${{ steps.changed-py-files.outputs.all_changed_files }}
hatch run types:mypy --python-version 3.11 --disable-error-code=import-untyped --follow-imports skip --pretty --show-error-context ${{ steps.changed-py-files.outputs.all_changed_files }}
8 changes: 4 additions & 4 deletions .github/workflows/mypy-project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ jobs:
- name: Type check project
run: |
# get any type stubs that mypy thinks it needs
hatch run types:mypy --install-types --non-interactive --follow-imports skip datalad_remake
# specify `--disable-error-code=import-untyped` until the
# datalad-packages have type stubs for all their modules.
hatch run types:mypy --disable-error-code=import-untyped --install-types --non-interactive --follow-imports skip datalad_remake
# run mypy on the full project.
# run on the oldest supported Python version.
# specify `--ignore-missing-imports` until the datalad-packages have
# type stubs for all their modules.
hatch run types:mypy --python-version 3.11 --ignore-missing-imports --pretty --show-error-context datalad_remake
hatch run types:mypy --python-version 3.11 --disable-error-code=import-untyped --pretty --show-error-context datalad_remake
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,11 @@ Create the template directory and a template
```bash
> mkdir -p .datalad/make/methods
> cat > .datalad/make/methods/one-to-many <<EOF
inputs = ['first', 'second', 'output']
parameter = ['first', 'second', 'output']
use_shell = 'true'
executable = 'echo'
arguments = [
"content: {first} > '{output}-1.txt';",
"echo content: {second} > '{output}-2.txt'",
command = [
"echo content: {first} > '{output}-1.txt'; echo content: {second} > '{output}-2.txt'",
]
EOF
> datalad save -m "add `one-to-many` remake method"
Expand Down
2 changes: 2 additions & 0 deletions datalad_remake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
__all__ = [
'__version__',
'command_suite',
'specification_dir',
'template_dir',
]


Expand Down
7 changes: 3 additions & 4 deletions datalad_remake/annexremotes/tests/test_hierarchies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
)

test_method = """
inputs = ['first', 'second', 'third']
parameters = ['first', 'second', 'third']
use_shell = 'true'
executable = 'echo'
arguments = [
"content: {first} > 'a.txt';",
command = [
"echo content: {first} > 'a.txt';",
"mkdir -p 'd2_subds0/d2_subds1/d2_subds2';",
"echo content: {second} > 'b.txt';",
"echo content: {third} > 'new.txt';",
Expand Down
7 changes: 2 additions & 5 deletions datalad_remake/annexremotes/tests/test_remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,11 @@
from ..remake_remote import RemakeRemote

template = """
inputs = ['content']
parameters = ['content']
use_shell = 'true'
executable = "echo"
arguments = [
"content: {content} > 'a.txt';",
]
command = ["echo content: {content} > 'a.txt'"]
"""


Expand Down
2 changes: 1 addition & 1 deletion datalad_remake/commands/make_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def execute(

# Run the computation in the worktree-directory
template_path = Path(template_dir) / template_name
worktree_ds.get(template_path)
worktree_ds.get(template_path, result_renderer='disabled')
compute(worktree, worktree / template_path, parameter)


Expand Down
2 changes: 1 addition & 1 deletion datalad_remake/commands/provision_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def provide(
# Get all input files in the worktree
with chdir(worktree_dataset.path):
for path in resolve_patterns(dataset, worktree_dataset, input_patterns):
worktree_dataset.get(path)
worktree_dataset.get(path, result_renderer='disabled')

yield get_status_dict(
action='provision',
Expand Down
7 changes: 3 additions & 4 deletions datalad_remake/commands/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
)

test_method = """
inputs = ['name', 'file']
parameters = ['name', 'file']
use_shell = 'true'
executable = 'echo'
arguments = ["Hello {name} > {file}"]
command = ["echo Hello {name} > {file}"]
"""

output_pattern = ['a.txt']
Expand Down Expand Up @@ -40,7 +39,7 @@ def test_speculative_computation(tmp_path, datalad_cfg):
)

# Perform the speculative computation
root_dataset.get('spec.txt')
root_dataset.get('spec.txt', result_renderer='disabled')
assert (root_dataset.pathobj / 'spec.txt').read_text() == 'Hello Robert\n'


Expand Down
32 changes: 15 additions & 17 deletions datalad_remake/utils/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
if TYPE_CHECKING:
from pathlib import Path

lgr = logging.getLogger('datalad.compute')
lgr = logging.getLogger('datalad.remake')


def substitute_string(
Expand Down Expand Up @@ -38,23 +38,23 @@ def get_substitutions(
template: dict[str, Any],
arguments: dict[str, str],
) -> dict[str, str]:
# Check the user specified inputs
inputs = template['inputs']
if len(inputs) != len(arguments.keys()):
msg = 'Template inputs and arguments have different lengths'
# Check the user specified parameters
parameters = template['parameters']
if len(parameters) != len(arguments.keys()):
msg = 'Method template parameters and arguments have different lengths'
raise ValueError(msg)
if not all(input_name in arguments for input_name in inputs):
if not all(param_name in arguments for param_name in parameters):
msg = (
f'Template inputs and arguments have different names: '
f'inputs: {inputs}, arguments: {arguments}'
f'Method template parameters and arguments have different names: '
f'parameters: {parameters}, arguments: {arguments}'
)
raise ValueError(msg)

if len(inputs) != len(set(inputs)):
msg = 'Template inputs contain duplicates'
if len(parameters) != len(set(parameters)):
msg = f'Method template parameters contain duplicates: {parameters}'
raise ValueError(msg)

return {input_name: arguments[input_name] for input_name in inputs}
return {param_name: arguments[param_name] for param_name in parameters}


def compute(
Expand All @@ -68,15 +68,13 @@ def compute(
substitutions = get_substitutions(template, compute_arguments)
substitutions['root_directory'] = str(root_directory)

substituted_executable = substitute_string(template['executable'], substitutions)
substituted_arguments = substitute_arguments(template, substitutions, 'arguments')
substituted_command = substitute_arguments(template, substitutions, 'command')

with contextlib.chdir(root_directory):
if template.get('use_shell', 'false') == 'true':
cmd = ' '.join([substituted_executable, *substituted_arguments])
cmd = ' '.join(substituted_command)
lgr.debug(f'compute: RUNNING: with shell=True: {cmd}')
subprocess.run(cmd, shell=True, check=True) # noqa: S602
else:
cmd_list = [substituted_executable, *substituted_arguments]
lgr.debug(f'compute: RUNNING: {cmd_list}')
subprocess.run(cmd_list, check=True)
lgr.debug(f'compute: RUNNING: {substituted_command}')
subprocess.run(substituted_command, check=True)
6 changes: 3 additions & 3 deletions examples/fmriprep docker/fmriprep-docker
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
#
# `datalad compute -I input.txt -O output.txt -P parameter.txt fmriprep_template`

inputs = ['input_dir', 'output_dir', 'participant_label', 'license_file']
parameters = ['input_dir', 'output_dir', 'participant_label', 'license_file']

use_shell = 'false'
executable = 'fmriprep-docker'

# Note: `{root_directory}` resolves to the directory of the dataset in which the
# computation was started with `datalad compute`.
arguments = [
command = [
'fmriprep-docker',
'{root_directory}/{input_dir}',
'{root_directory}/{output_dir}',
'participant',
Expand Down
8 changes: 4 additions & 4 deletions examples/fmriprep-singularity/fmriprep-singularity
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
#
# `datalad make -I input.txt -O output.txt -P parameter.txt fmriprep-singularity`

inputs = ['container', 'input_dir', 'output_dir', 'participant_label', 'license_file']
parameters = ['container', 'input_dir', 'output_dir', 'participant_label', 'license_file']

use_shell = 'false'
executable = 'singularity'

# Note: `{root_directory}` resolves to the directory of the dataset in which the
# computation was started with `datalad make`.

arguments = [
command = [
'singularity',
'run', '{container}',
'{root_directory}/{input_dir}',
'{root_directory}/{output_dir}',
Expand All @@ -36,4 +36,4 @@ arguments = [
'--fs-no-reconall',
'--skip-bids-validation',
'--ignore', 'slicetiming',
]
]
17 changes: 6 additions & 11 deletions examples/one-to-many
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,17 @@
# each input variable. In this case the invocation could look like this:
# `datalad make -p first=bob -p second=alice -p output=name ... one-to-many`
#
inputs = ['first', 'second', 'output']
parameters = ['first', 'second', 'output']

# Use a shell to interpret `arguments`. By default, `use_shell` is 'false'.
#
use_shell = 'true'

# The name of the executable. This will be the prepended to the argument list
# given in `arguments`.
#
executable = 'echo'

# Arguments to the executable. The curly braces are placeholders for the
# input variables that were defined above. They will be replaced with the
# values provided in the parameter arguments of `datalad make`.
# The command line that should be executed. The curly braces are placeholders
# for the input variables that were defined above. They will be replaced with
# the values provided in the parameter arguments of `datalad make`.
#
arguments = [
"content: {first} > '{output}-1.txt';",
"echo content: {second} > '{output}-2.txt'",
command = [
"echo content: {first} > '{output}-1.txt'; echo content: {second} > '{output}-2.txt'",
]

0 comments on commit 45f9ee8

Please sign in to comment.