From d49216f7fd86fd3997411a668eb458bd650d6adf Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 13 Sep 2023 19:40:50 +0200 Subject: [PATCH 1/5] First crude smoke test for SSHRemoteIO See if the whole shebang of identityfile, custom username, with custom port still yields a working IO abstraction. --- datalad_ria/tests/test_ssh_remote_io.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 datalad_ria/tests/test_ssh_remote_io.py diff --git a/datalad_ria/tests/test_ssh_remote_io.py b/datalad_ria/tests/test_ssh_remote_io.py new file mode 100644 index 00000000..bf0fcd1b --- /dev/null +++ b/datalad_ria/tests/test_ssh_remote_io.py @@ -0,0 +1,11 @@ +from datalad.distributed.ora_remote import SSHRemoteIO + + +def test_SSHRemoteIO_basics(ria_sshserver_setup, ria_sshserver): + # basic smoke test, just login and use the abstraction to read a file + ssh = SSHRemoteIO( + 'ssh://{SSH_LOGIN}@{HOST}:{SSH_PORT}'.format(**ria_sshserver_setup) + ) + etcpasswd = ssh.read_file('/etc/passwd') + # we can assume that the remote /etc/passwd file is not empty + assert etcpasswd From f079c09325e9a0c681007db7ad1e7a11a897d2c4 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 18 Sep 2023 17:31:11 +0200 Subject: [PATCH 2/5] add a patch for SSHRmoteIO.__init__() This commit adds a patch that replaces SSHRemoteIO.__init__() with an udated version. The updated version uses proper arguments to open a the shell-ssh, if the ssh-connection is an instance of NoMultiplexSSHConnection. --- datalad_ria/patches/enabled.py | 1 + datalad_ria/patches/sshremoteio.py | 155 +++++++++++++++++++++++++++++ datalad_ria/tests/utils.py | 2 + docs/source/patches.rst | 1 + 4 files changed, 159 insertions(+) create mode 100644 datalad_ria/patches/sshremoteio.py diff --git a/datalad_ria/patches/enabled.py b/datalad_ria/patches/enabled.py index d68bc044..d18a4713 100644 --- a/datalad_ria/patches/enabled.py +++ b/datalad_ria/patches/enabled.py @@ -1,3 +1,4 @@ from . import ( ssh_exec, + sshremoteio, ) diff --git a/datalad_ria/patches/sshremoteio.py b/datalad_ria/patches/sshremoteio.py new file mode 100644 index 00000000..fb47e1cc --- /dev/null +++ b/datalad_ria/patches/sshremoteio.py @@ -0,0 +1,155 @@ +""" + +This patch fixes errors in the __init__ function of +datalad.distributed.ora_remote.py:SSHRemoteIO. + +The original code has two problems. + +1. The `cmd`-argument for the shell ssh-process, which is created by: + `self.shell = subprocess.Popen(cmd, ...)` + is not correct, if `self.ssh` is an instance of `NoMultiplexSSHConnection`. + + The changes in this patch build the correct `cmd`-argument by adding + additional arguments to `cmd`, if `self.ssh` is an instance of + `NoMultiplexSSHConnection`. More precisely, the arguments that are + required to open a "shell" in a `NoMultiplexSSHConnection` are stored + in `NoMultiplexSSHConnection._ssh_open_args` and not in + `NoMultiplexSSHConnection._ssh_args`. This patch therefore provides + arguments from both lists, i.e. from `_ssh_args` and `_ssh_open_args` in the + call that opens a "shell", if `self.ssh` is an instance of + `NoMultiplexSSHConnection`. + +2. The while-loop that waits to read `b"RIA-REMOTE-LOGIN-END\n"` from the + shell ssh-process did not contain any error handling. That led to an + infinite loop in case that the shell ssh-process terminates without + writing `b"RIA-REMOTE-LOGIN-END\n"` to its stdout, or in the case that + the stdout-pipeline of the shell ssh-process is closed. + + This patch introduces two checks into the while loop. One check for + termination of the ssh shell-process, and one check for a closed + stdout-pipeline of the ssh shell-process, i.e. reading an EOF from + the stdout-pipeline. If any of those two cases appears, an exception is + raised. + +In addition, this patch modifies two comments. It adds a missing description of +the `buffer_size`-parameter of `SSHRemoteIO.__init__`to the doc-string, and +fixes the description of the condition in the comment on the use of +`DEFAULT_BUFFER_SIZE`. + +""" + +import logging +import subprocess + +from datalad import ssh_manager +from datalad.support.exceptions import CommandError +from datalad.support.sshconnector import NoMultiplexSSHConnection +from datalad_next.utils.consts import COPY_BUFSIZE +from datalad_next.patches import apply_patch + +# use same logger as -core +lgr = logging.getLogger('datalad.customremotes.ria_remote') + + +DEFAULT_BUFFER_SIZE = COPY_BUFSIZE + + +# The method 'SSHRemoteIO__init__' is a patched version of +# 'datalad/distributed/ora-remote.py:SSHRemoteIO.__init___' +# from datalad@8a145bf432ae8931be7039c97ff602e53813d238 +# +# There were changes in multiple places of the original +# method. For reference, the following is a copy of the +# original method: +# +# def __init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): +# """ +# Parameters +# ---------- +# host : str +# SSH-accessible host(name) to perform remote IO operations +# on. +# """ +# +# # the connection to the remote +# # we don't open it yet, not yet clear if needed +# self.ssh = ssh_manager.get_connection( +# host, +# use_remote_annex_bundle=False, +# ) +# self.ssh.open() +# # open a remote shell +# cmd = ['ssh'] + self.ssh._ssh_args + [self.ssh.sshri.as_str()] +# self.shell = subprocess.Popen(cmd, +# stderr=subprocess.DEVNULL, +# stdout=subprocess.PIPE, +# stdin=subprocess.PIPE) +# # swallow login message(s): +# self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n") +# self.shell.stdin.flush() +# while True: +# line = self.shell.stdout.readline() +# if line == b"RIA-REMOTE-LOGIN-END\n": +# break +# # TODO: Same for stderr? +# +# # make sure default is used when None was passed, too. +# self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE +# +def SSHRemoteIO__init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): + """ + Parameters + ---------- + host : str + SSH-accessible host(name) to perform remote IO operations + on. + buffer_size: int or None + The preferred buffer size + """ + + # the connection to the remote + # we don't open it yet, not yet clear if needed + self.ssh = ssh_manager.get_connection( + host, + use_remote_annex_bundle=False, + ) + self.ssh.open() + + # This is a PATCH: it extends ssh_args to contain all + # necessary parameters + ssh_args = self.ssh._ssh_args + if isinstance(self.ssh, NoMultiplexSSHConnection): + ssh_args.extend(self.ssh._ssh_open_args) + cmd = ['ssh'] + ssh_args + [self.ssh.sshri.as_str()] + + # open a remote shell + self.shell = subprocess.Popen(cmd, + stderr=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE) + # swallow login message(s): + self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n") + self.shell.stdin.flush() + while True: + # This is a PATCH: detect a terminated shell-process + status = self.shell.poll() + if status not in (0, None): + raise CommandError(f'ssh shell process exited with {status}') + + line = self.shell.stdout.readline() + if line == b"RIA-REMOTE-LOGIN-END\n": + break + + # This is a PATCH: detect closing of stdout of the shell-process + if line == '': + raise RuntimeError(f'ssh shell process close stdout unexpectedly') + # TODO: Same for stderr? + + # make sure default is used if 0 or None was passed, too. + self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE + + +apply_patch( + 'datalad.distributed.ora_remote', 'SSHRemoteIO', '__init__', + SSHRemoteIO__init__, +) diff --git a/datalad_ria/tests/utils.py b/datalad_ria/tests/utils.py index fcb2cae3..bd26ec0f 100644 --- a/datalad_ria/tests/utils.py +++ b/datalad_ria/tests/utils.py @@ -38,6 +38,7 @@ def assert_ssh_access( subprocess.run( ssh_call + [ f"bash -c 'mkdir -p {path} && touch {path}/datalad-tests-probe'"], + stdin=subprocess.PIPE, check=True, ) if localpath: @@ -45,6 +46,7 @@ def assert_ssh_access( assert (Path(localpath) / 'datalad-tests-probe').exists() subprocess.run( ssh_call + [f"bash -c 'rm {path}/datalad-tests-probe'"], + stdin=subprocess.PIPE, check=True, ) if localpath: diff --git a/docs/source/patches.rst b/docs/source/patches.rst index 3fa0b330..72e021a7 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -6,3 +6,4 @@ DataLad patches :toctree: generated ssh_exec + sshremoteio From 6103e5beb242c9b778627048d0ea87baf4de85a4 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 20 Sep 2023 11:21:37 +0200 Subject: [PATCH 3/5] Remove code copy, made obsolete by documentation --- datalad_ria/patches/sshremoteio.py | 39 ------------------------------ 1 file changed, 39 deletions(-) diff --git a/datalad_ria/patches/sshremoteio.py b/datalad_ria/patches/sshremoteio.py index fb47e1cc..0419fc6e 100644 --- a/datalad_ria/patches/sshremoteio.py +++ b/datalad_ria/patches/sshremoteio.py @@ -57,45 +57,6 @@ # The method 'SSHRemoteIO__init__' is a patched version of # 'datalad/distributed/ora-remote.py:SSHRemoteIO.__init___' # from datalad@8a145bf432ae8931be7039c97ff602e53813d238 -# -# There were changes in multiple places of the original -# method. For reference, the following is a copy of the -# original method: -# -# def __init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): -# """ -# Parameters -# ---------- -# host : str -# SSH-accessible host(name) to perform remote IO operations -# on. -# """ -# -# # the connection to the remote -# # we don't open it yet, not yet clear if needed -# self.ssh = ssh_manager.get_connection( -# host, -# use_remote_annex_bundle=False, -# ) -# self.ssh.open() -# # open a remote shell -# cmd = ['ssh'] + self.ssh._ssh_args + [self.ssh.sshri.as_str()] -# self.shell = subprocess.Popen(cmd, -# stderr=subprocess.DEVNULL, -# stdout=subprocess.PIPE, -# stdin=subprocess.PIPE) -# # swallow login message(s): -# self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n") -# self.shell.stdin.flush() -# while True: -# line = self.shell.stdout.readline() -# if line == b"RIA-REMOTE-LOGIN-END\n": -# break -# # TODO: Same for stderr? -# -# # make sure default is used when None was passed, too. -# self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE -# def SSHRemoteIO__init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE): """ Parameters From 7e2006a567febe5a4d5b3dd6cefb17d34c1fc7f7 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 20 Sep 2023 11:28:52 +0200 Subject: [PATCH 4/5] Fix RST markup of patch description --- datalad_ria/patches/sshremoteio.py | 51 ++++++++++++++---------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/datalad_ria/patches/sshremoteio.py b/datalad_ria/patches/sshremoteio.py index 0419fc6e..20ad9a94 100644 --- a/datalad_ria/patches/sshremoteio.py +++ b/datalad_ria/patches/sshremoteio.py @@ -1,41 +1,36 @@ -""" - -This patch fixes errors in the __init__ function of -datalad.distributed.ora_remote.py:SSHRemoteIO. +"""Enable `SSHRemoteIO` operation on Windows The original code has two problems. -1. The `cmd`-argument for the shell ssh-process, which is created by: - `self.shell = subprocess.Popen(cmd, ...)` - is not correct, if `self.ssh` is an instance of `NoMultiplexSSHConnection`. - - The changes in this patch build the correct `cmd`-argument by adding - additional arguments to `cmd`, if `self.ssh` is an instance of - `NoMultiplexSSHConnection`. More precisely, the arguments that are - required to open a "shell" in a `NoMultiplexSSHConnection` are stored - in `NoMultiplexSSHConnection._ssh_open_args` and not in - `NoMultiplexSSHConnection._ssh_args`. This patch therefore provides - arguments from both lists, i.e. from `_ssh_args` and `_ssh_open_args` in the - call that opens a "shell", if `self.ssh` is an instance of - `NoMultiplexSSHConnection`. - -2. The while-loop that waits to read `b"RIA-REMOTE-LOGIN-END\n"` from the +1. The ``cmd``-argument for the shell ssh-process, which is created by: + ``self.shell = subprocess.Popen(cmd, ...)`` is not correct, if ``self.ssh``i + is an instance of ``NoMultiplexSSHConnection``. + + The changes in this patch build the correct ``cmd``-argument by adding + additional arguments to ``cmd``, if `self.ssh` is an instance of + ``NoMultiplexSSHConnection``. More precisely, the arguments that are + required to open a "shell" in a ``NoMultiplexSSHConnection`` are stored in + ``NoMultiplexSSHConnection._ssh_open_args`` and not in + ``NoMultiplexSSHConnection._ssh_args``. This patch therefore provides + arguments from both lists, i.e. from ``_ssh_args`` and ``_ssh_open_args`` in + the call that opens a "shell", if ``self.ssh`` is an instance of + ``NoMultiplexSSHConnection``. + +2. The while-loop that waits to read ``b"RIA-REMOTE-LOGIN-END\\n"`` from the shell ssh-process did not contain any error handling. That led to an - infinite loop in case that the shell ssh-process terminates without - writing `b"RIA-REMOTE-LOGIN-END\n"` to its stdout, or in the case that - the stdout-pipeline of the shell ssh-process is closed. + infinite loop in case that the shell ssh-process terminates without writing + ``b"RIA-REMOTE-LOGIN-END\\n"`` to its stdout, or in the case that the + stdout-pipeline of the shell ssh-process is closed. This patch introduces two checks into the while loop. One check for termination of the ssh shell-process, and one check for a closed - stdout-pipeline of the ssh shell-process, i.e. reading an EOF from - the stdout-pipeline. If any of those two cases appears, an exception is - raised. + stdout-pipeline of the ssh shell-process, i.e. reading an EOF from the + stdout-pipeline. If any of those two cases appears, an exception is raised. In addition, this patch modifies two comments. It adds a missing description of -the `buffer_size`-parameter of `SSHRemoteIO.__init__`to the doc-string, and +the ``buffer_size``-parameter of ``SSHRemoteIO.__init__``to the doc-string, and fixes the description of the condition in the comment on the use of -`DEFAULT_BUFFER_SIZE`. - +``DEFAULT_BUFFER_SIZE``. """ import logging From a102c53d7ad16eb506eedf7ee4ff63684bc401fd Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 20 Sep 2023 11:32:55 +0200 Subject: [PATCH 5/5] Bring patch imports in alignment with guidelines Document unavoidable deviations. --- datalad_ria/patches/sshremoteio.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datalad_ria/patches/sshremoteio.py b/datalad_ria/patches/sshremoteio.py index 20ad9a94..cb8de10a 100644 --- a/datalad_ria/patches/sshremoteio.py +++ b/datalad_ria/patches/sshremoteio.py @@ -36,9 +36,12 @@ import logging import subprocess -from datalad import ssh_manager +from datalad.distributed.ora_remote import ssh_manager +# we need to get this from elsewhere, the orginal code does local imports from datalad.support.exceptions import CommandError +# we need this for a conditional that is not part of the original code from datalad.support.sshconnector import NoMultiplexSSHConnection + from datalad_next.utils.consts import COPY_BUFSIZE from datalad_next.patches import apply_patch