Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First crude smoke test for SSHRemoteIO #69

Merged
merged 5 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions datalad_ria/patches/enabled.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import (
ssh_exec,
sshremoteio,
)
114 changes: 114 additions & 0 deletions datalad_ria/patches/sshremoteio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""Enable `SSHRemoteIO` operation on Windows

The original code has two problems.

1. The ``cmd``-argument for the shell ssh-process, which is created by:
``self.shell = subprocess.Popen(cmd, ...)`` is not correct, if ``self.ssh``i
is an instance of ``NoMultiplexSSHConnection``.

The changes in this patch build the correct ``cmd``-argument by adding
additional arguments to ``cmd``, if `self.ssh` is an instance of
``NoMultiplexSSHConnection``. More precisely, the arguments that are
required to open a "shell" in a ``NoMultiplexSSHConnection`` are stored in
``NoMultiplexSSHConnection._ssh_open_args`` and not in
``NoMultiplexSSHConnection._ssh_args``. This patch therefore provides
arguments from both lists, i.e. from ``_ssh_args`` and ``_ssh_open_args`` in
the call that opens a "shell", if ``self.ssh`` is an instance of
``NoMultiplexSSHConnection``.

2. The while-loop that waits to read ``b"RIA-REMOTE-LOGIN-END\\n"`` from the
shell ssh-process did not contain any error handling. That led to an
infinite loop in case that the shell ssh-process terminates without writing
``b"RIA-REMOTE-LOGIN-END\\n"`` to its stdout, or in the case that the
stdout-pipeline of the shell ssh-process is closed.

This patch introduces two checks into the while loop. One check for
termination of the ssh shell-process, and one check for a closed
stdout-pipeline of the ssh shell-process, i.e. reading an EOF from the
stdout-pipeline. If any of those two cases appears, an exception is raised.

In addition, this patch modifies two comments. It adds a missing description of
the ``buffer_size``-parameter of ``SSHRemoteIO.__init__``to the doc-string, and
fixes the description of the condition in the comment on the use of
``DEFAULT_BUFFER_SIZE``.
"""

import logging
import subprocess

from datalad.distributed.ora_remote import ssh_manager
# we need to get this from elsewhere, the orginal code does local imports
from datalad.support.exceptions import CommandError
# we need this for a conditional that is not part of the original code
from datalad.support.sshconnector import NoMultiplexSSHConnection

from datalad_next.utils.consts import COPY_BUFSIZE
from datalad_next.patches import apply_patch

# use same logger as -core
lgr = logging.getLogger('datalad.customremotes.ria_remote')


DEFAULT_BUFFER_SIZE = COPY_BUFSIZE


# The method 'SSHRemoteIO__init__' is a patched version of
# 'datalad/distributed/ora-remote.py:SSHRemoteIO.__init___'
# from datalad@8a145bf432ae8931be7039c97ff602e53813d238
def SSHRemoteIO__init__(self, host, buffer_size=DEFAULT_BUFFER_SIZE):
christian-monch marked this conversation as resolved.
Show resolved Hide resolved
"""
Parameters
----------
host : str
SSH-accessible host(name) to perform remote IO operations
on.
buffer_size: int or None
The preferred buffer size
"""

# the connection to the remote
# we don't open it yet, not yet clear if needed
self.ssh = ssh_manager.get_connection(
host,
use_remote_annex_bundle=False,
)
self.ssh.open()

# This is a PATCH: it extends ssh_args to contain all
# necessary parameters
ssh_args = self.ssh._ssh_args
if isinstance(self.ssh, NoMultiplexSSHConnection):
ssh_args.extend(self.ssh._ssh_open_args)
cmd = ['ssh'] + ssh_args + [self.ssh.sshri.as_str()]

# open a remote shell
self.shell = subprocess.Popen(cmd,
stderr=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
# swallow login message(s):
self.shell.stdin.write(b"echo RIA-REMOTE-LOGIN-END\n")
self.shell.stdin.flush()
while True:
# This is a PATCH: detect a terminated shell-process
status = self.shell.poll()
if status not in (0, None):
raise CommandError(f'ssh shell process exited with {status}')

line = self.shell.stdout.readline()
if line == b"RIA-REMOTE-LOGIN-END\n":
break

# This is a PATCH: detect closing of stdout of the shell-process
if line == '':
raise RuntimeError(f'ssh shell process close stdout unexpectedly')
# TODO: Same for stderr?

# make sure default is used if 0 or None was passed, too.
self.buffer_size = buffer_size if buffer_size else DEFAULT_BUFFER_SIZE


apply_patch(
'datalad.distributed.ora_remote', 'SSHRemoteIO', '__init__',
SSHRemoteIO__init__,
)
11 changes: 11 additions & 0 deletions datalad_ria/tests/test_ssh_remote_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from datalad.distributed.ora_remote import SSHRemoteIO


def test_SSHRemoteIO_basics(ria_sshserver_setup, ria_sshserver):
# basic smoke test, just login and use the abstraction to read a file
ssh = SSHRemoteIO(
'ssh://{SSH_LOGIN}@{HOST}:{SSH_PORT}'.format(**ria_sshserver_setup)
)
etcpasswd = ssh.read_file('/etc/passwd')
# we can assume that the remote /etc/passwd file is not empty
assert etcpasswd
2 changes: 2 additions & 0 deletions datalad_ria/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@ def assert_ssh_access(
subprocess.run(
ssh_call + [
f"bash -c 'mkdir -p {path} && touch {path}/datalad-tests-probe'"],
stdin=subprocess.PIPE,
check=True,
)
if localpath:
# check if a given
assert (Path(localpath) / 'datalad-tests-probe').exists()
subprocess.run(
ssh_call + [f"bash -c 'rm {path}/datalad-tests-probe'"],
stdin=subprocess.PIPE,
check=True,
)
if localpath:
Expand Down
1 change: 1 addition & 0 deletions docs/source/patches.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ DataLad patches
:toctree: generated

ssh_exec
sshremoteio