launch_shell_job: Add option to keep skip resolving of command

By default, `launch_shell_job` would resolve the command to the absolute filepath of the corresponding executable. This would serve two purposes: checking the command exists and increasing the reproducibility. The relative command name may be changed on the remote over time to point to another executable. By using the absolute filepath this probability is reduced, but of course not fully avoided since the file at the absolute path can still be changed on the remote.
sphuber · Feb 14, 2024 · d4ad9e7 · d4ad9e7
1 parent c439b5f
commit d4ad9e7
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 11 deletions.
diff --git a/docs/source/howto.rst b/docs/source/howto.rst
@@ -615,6 +615,33 @@ which prints ``some output``.
     The entry point will automatically be validated and wrapped in a :class:`aiida_shell.data.entry_point.EntryPointData`.
 
 
+.. _how-to:keep-command-path-relative:
+
+Keeping the command path relative
+=================================
+
+By default, :meth:`~aiida_shell.launch.launch_shell_job` automatically converts the provided command to the absolute filepath of the corresponding executable.
+This serves two purposes:
+
+1. A check to make sure the command exists on the specified computer
+2. Increases the quality of provenance
+
+The executable that a relative command resolves to on the target computer can change as a function of the environment, or simply change over time.
+Storing the actual absolute filepath of the executable avoids this, although it remains of course vulnerable to the executable itself actually being changed over time.
+
+Nevertheless, there may be use-cases where the resolving of the command is not desirable.
+To skip this step and keep the command as specified, set the ``resolve_command`` argument to ``False``:
+
+.. code-block:: python
+
+    from aiida_shell import launch_shell_job
+    results, node = launch_shell_job('date')
+    assert str(node.inputs.code.filepath_executable) == '/usr/bin/date'
+
+    results, node = launch_shell_job('date', resolve_command=False)
+    assert str(node.inputs.code.filepath_executable) == 'date'
+
+
 Customizing run environment
 ===========================
 

diff --git a/src/aiida_shell/launch.py b/src/aiida_shell/launch.py
@@ -28,6 +28,7 @@ def launch_shell_job(  # noqa: PLR0913
     parser: t.Callable[[Parser, pathlib.Path], dict[str, Data]] | str | None = None,
     metadata: dict[str, t.Any] | None = None,
     submit: bool = False,
+    resolve_command: bool = True,
 ) -> tuple[dict[str, Data], ProcessNode]:
     """Launch a :class:`aiida_shell.ShellJob` job for the given command.
 
@@ -45,8 +46,12 @@ def launch_shell_job(  # noqa: PLR0913
         callable.
     :param metadata: Optional dictionary of metadata inputs to be passed to the ``ShellJob``.
     :param submit: Boolean, if ``True`` will submit the job to the daemon instead of running in current interpreter.
+    :param resolve_command: Whether to resolve the command to the absolute path of the executable. If set to ``True``,
+        the ``which`` command is executed on the target computer to attempt and determine the absolute path. Otherwise,
+        the command is set as the ``filepath_executable`` attribute of the created ``AbstractCode`` instance.
     :raises TypeError: If the value specified for ``metadata.options.computer`` is not a ``Computer``.
-    :raises ValueError: If the absolute path of the command on the computer could not be determined.
+    :raises ValueError: If ``resolve_command=True`` and the absolute path of the command on the computer could not be
+        determined.
     :returns: The tuple of results dictionary and ``ProcessNode``, or just the ``ProcessNode`` if ``submit=True``. The
         results dictionary intentionally doesn't include the ``retrieved`` and ``remote_folder`` outputs as they are
         generated for each ``CalcJob`` and typically are not of interest to a user running ``launch_shell_job``. In
@@ -55,7 +60,7 @@ def launch_shell_job(  # noqa: PLR0913
     computer = (metadata or {}).get('options', {}).pop('computer', None)
 
     if isinstance(command, str):
-        code = prepare_code(command, computer)
+        code = prepare_code(command, computer, resolve_command)
     else:
         lang.type_check(command, AbstractCode)
         code = command
@@ -86,14 +91,18 @@ def launch_shell_job(  # noqa: PLR0913
     return {label: node for label, node in results.items() if label not in ('retrieved', 'remote_folder')}, node
 
 
-def prepare_code(command: str, computer: Computer | None = None) -> AbstractCode:
+def prepare_code(command: str, computer: Computer | None = None, resolve_command: bool = True) -> AbstractCode:
     """Prepare a code for the given command and computer.
 
-    This will automatically prepare the computer
+    This will automatically prepare the computer.
 
     :param command: The command that the code should represent. Can be the relative executable name or absolute path.
     :param computer: The computer on which the command should be run. If not defined the localhost will be used.
+    :param resolve_command: Whether to resolve the command to the absolute path of the executable. If set to ``True``,
+        the ``which`` command is executed on the target computer to attempt and determine the absolute path. Otherwise,
+        the command is set as the ``filepath_executable`` attribute of the created ``AbstractCode`` instance.
     :return: A :class:`aiida.orm.nodes.code.abstract.AbstractCode` instance.
+    :raises ValueError: If ``resolve_command=True`` and the code fails to determine the absolute path of the command.
     """
     computer = prepare_computer(computer)
     code_label = f'{command}@{computer.label}'
@@ -103,14 +112,17 @@ def prepare_code(command: str, computer: Computer | None = None) -> AbstractCode
     except exceptions.NotExistent as exception:
         LOGGER.info('No code exists yet for `%s`, creating it now.', code_label)
 
-        with computer.get_transport() as transport:
-            status, stdout, stderr = transport.exec_command_wait(f'which {command}')
-            executable = stdout.strip()
+        if resolve_command:
+            with computer.get_transport() as transport:
+                status, stdout, stderr = transport.exec_command_wait(f'which {command}')
+                executable = stdout.strip()
 
-            if status != 0:
-                raise ValueError(
-                    f'failed to determine the absolute path of the command on the computer: {stderr}'
-                ) from exception
+                if status != 0:
+                    raise ValueError(
+                        f'failed to determine the absolute path of the command on the computer: {stderr}'
+                    ) from exception
+        else:
+            executable = command
 
         code = ShellCode(  # type: ignore[assignment]
             label=command, computer=computer, filepath_executable=executable, default_calc_job_plugin='core.shell'

diff --git a/tests/test_launch.py b/tests/test_launch.py
@@ -239,6 +239,20 @@ def job_function():
     assert isinstance(results['stdout'], SinglefileData)
 
 
+@pytest.mark.parametrize(
+    'resolve_command, executable',
+    (
+        (True, '/usr/bin/date'),
+        (False, 'date'),
+    ),
+)
+@pytest.mark.usefixtures('aiida_profile_clean')
+def test_resolve_command(resolve_command, executable):
+    """Test the ``resolve_command`` argument."""
+    _, node = launch_shell_job('date', resolve_command=resolve_command)
+    assert str(node.inputs.code.filepath_executable) == executable
+
+
 def test_parser():
     """Test the ``parser`` argument."""