diff --git a/datalad_ria/patches/enabled.py b/datalad_ria/patches/enabled.py index d68bc044..d18a4713 100644 --- a/datalad_ria/patches/enabled.py +++ b/datalad_ria/patches/enabled.py @@ -1,3 +1,4 @@ from . import ( ssh_exec, + sshremoteio, ) diff --git a/datalad_ria/patches/sshremoteio.py b/datalad_ria/patches/sshremoteio.py new file mode 100644 index 00000000..cb8de10a --- /dev/null +++ b/datalad_ria/patches/sshremoteio.py @@ -0,0 +1,114 @@ +"""Enable `SSHRemoteIO` operation on Windows + +The original code has two problems. + +1. The ``cmd``-argument for the shell ssh-process, which is created by: + ``self.shell = subprocess.Popen(cmd, ...)`` is not correct, if ``self.ssh``i + is an instance of ``NoMultiplexSSHConnection``. + + The changes in this patch build the correct ``cmd``-argument by adding + additional arguments to ``cmd``, if `self.ssh` is an instance of + ``NoMultiplexSSHConnection``. More precisely, the arguments that are + required to open a "shell" in a ``NoMultiplexSSHConnection`` are stored in + ``NoMultiplexSSHConnection._ssh_open_args`` and not in + ``NoMultiplexSSHConnection._ssh_args``. This patch therefore provides + arguments from both lists, i.e. from ``_ssh_args`` and ``_ssh_open_args`` in + the call that opens a "shell", if ``self.ssh`` is an instance of + ``NoMultiplexSSHConnection``. + +2. The while-loop that waits to read ``b"RIA-REMOTE-LOGIN-END\\n"`` from the + shell ssh-process did not contain any error handling. That led to an + infinite loop in case that the shell ssh-process terminates without writing + ``b"RIA-REMOTE-LOGIN-END\\n"`` to its stdout, or in the case that the + stdout-pipeline of the shell ssh-process is closed. + + This patch introduces two checks into the while loop. One check for + termination of the ssh shell-process, and one check for a closed + stdout-pipeline of the ssh shell-process, i.e. reading an EOF from the + stdout-pipeline. If any of those two cases appears, an exception is raised. + +In addition, this patch modifies two comments. It adds a missing description of +the ``buffer_size``-parameter of ``SSHRemoteIO.__init__``to the doc-string, and +fixes the description of the condition in the comment on the use of +``DEFAULT_BUFFER_SIZE``. +""" + +import logging +import subprocess + +from datalad.distributed.ora_remote import ssh_manager +# we need to get this from elsewhere, the orginal code does local imports +from datalad.support.exceptions import CommandError +# we need this for a conditional that is not part of the original code +from datalad.support.sshconnector import NoMultiplexSSHConnection + +from datalad_next.utils.consts import COPY_BUFSIZE +from datalad_next.patches import apply_patch + +# use same logger as -core +lgr = logging.getLogger('datalad.customremotes.ria_remote') + + +DEFAULT_BUFFER_SIZE = COPY_BUFSIZE + + +# The method 'SSHRemoteIO__init__' is a patched version of +# 'datalad/distributed/ora-remote.py:SSHRemoteIO.__init___' +# from datalad@8a145bf432ae8931be7039c97ff602e53813d238 +def SSHRemoteIO__init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): + """ + Parameters + ---------- + host : str + SSH-accessible host(name) to perform remote IO operations + on. + buffer_size: int or None + The preferred buffer size + """ + + # the connection to the remote + # we don't open it yet, not yet clear if needed + self.ssh = ssh_manager.get_connection( + host, + use_remote_annex_bundle=False, + ) + self.ssh.open() + + # This is a PATCH: it extends ssh_args to contain all + # necessary parameters + ssh_args = self.ssh._ssh_args + if isinstance(self.ssh, NoMultiplexSSHConnection): + ssh_args.extend(self.ssh._ssh_open_args) + cmd = ['ssh'] + ssh_args + [self.ssh.sshri.as_str()] + + # open a remote shell + self.shell = subprocess.Popen(cmd, + stderr=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE) + # swallow login message(s): + self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n") + self.shell.stdin.flush() + while True: + # This is a PATCH: detect a terminated shell-process + status = self.shell.poll() + if status not in (0, None): + raise CommandError(f'ssh shell process exited with {status}') + + line = self.shell.stdout.readline() + if line == b"RIA-REMOTE-LOGIN-END\n": + break + + # This is a PATCH: detect closing of stdout of the shell-process + if line == '': + raise RuntimeError(f'ssh shell process close stdout unexpectedly') + # TODO: Same for stderr? + + # make sure default is used if 0 or None was passed, too. + self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE + + +apply_patch( + 'datalad.distributed.ora_remote', 'SSHRemoteIO', '__init__', + SSHRemoteIO__init__, +) diff --git a/datalad_ria/tests/test_ssh_remote_io.py b/datalad_ria/tests/test_ssh_remote_io.py new file mode 100644 index 00000000..bf0fcd1b --- /dev/null +++ b/datalad_ria/tests/test_ssh_remote_io.py @@ -0,0 +1,11 @@ +from datalad.distributed.ora_remote import SSHRemoteIO + + +def test_SSHRemoteIO_basics(ria_sshserver_setup, ria_sshserver): + # basic smoke test, just login and use the abstraction to read a file + ssh = SSHRemoteIO( + 'ssh://{SSH_LOGIN}@{HOST}:{SSH_PORT}'.format(**ria_sshserver_setup) + ) + etcpasswd = ssh.read_file('/etc/passwd') + # we can assume that the remote /etc/passwd file is not empty + assert etcpasswd diff --git a/datalad_ria/tests/utils.py b/datalad_ria/tests/utils.py index fcb2cae3..bd26ec0f 100644 --- a/datalad_ria/tests/utils.py +++ b/datalad_ria/tests/utils.py @@ -38,6 +38,7 @@ def assert_ssh_access( subprocess.run( ssh_call + [ f"bash -c 'mkdir -p {path} && touch {path}/datalad-tests-probe'"], + stdin=subprocess.PIPE, check=True, ) if localpath: @@ -45,6 +46,7 @@ def assert_ssh_access( assert (Path(localpath) / 'datalad-tests-probe').exists() subprocess.run( ssh_call + [f"bash -c 'rm {path}/datalad-tests-probe'"], + stdin=subprocess.PIPE, check=True, ) if localpath: diff --git a/docs/source/patches.rst b/docs/source/patches.rst index 3fa0b330..72e021a7 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -6,3 +6,4 @@ DataLad patches :toctree: generated ssh_exec + sshremoteio