Skip to content

Commit

Permalink
Fix test_ssh_file_transfer.py version carried over from other PR.
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeZiminski committed Oct 20, 2023
1 parent 72b7c3c commit 87d2319
Showing 1 changed file with 152 additions and 164 deletions.
316 changes: 152 additions & 164 deletions tests/tests_integration/test_ssh_file_transfer.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,114 @@
"""
"""
import copy
import glob
import shutil
import time
from pathlib import Path

import pandas as pd
import paramiko
import pytest
import ssh_test_utils
import test_utils

# from pytest import ssh_config
from file_conflicts_pathtable import get_pathtable

from datashuttle.utils import ssh

TEST_SSH = True # TODO: base on whether docker / singularity is installed.


PARAM_SUBS = [
["all"],
["all_sub"],
["all_non_sub"],
["sub-001"],
["sub-003_date-20231901"],
["sub-002", "all_non_sub"],
]
PARAM_SES = [
["all"],
["all_non_ses"],
["all_ses"],
["ses-001"],
["ses-002_random-key"],
["all_non_ses", "ses-001"],
]
PARAM_DATATYPE = [
["all"],
["all_ses_level_non_datatype"],
["all_datatype"],
["behav"],
["ephys"],
["histology"],
["funcimg"],
["histology", "behav", "all_ses_level_non_datatype"],
]
from pytest import ssh_config


class TestFileTransfer:
@pytest.fixture(
scope="class",
params=[ # Set running SSH or local filesystem (see docstring).
False,
pytest.param(
True,
marks=pytest.mark.skipif(
ssh_config.TEST_SSH is False,
reason="TEST_SSH is set to False.",
),
),
],
)
def pathtable_and_project(self, tmpdir_factory):
""" """
def pathtable_and_project(self, request, tmpdir_factory):
"""
Create a project for SSH testing. Setup
the project as normal, and switch configs
to use SSH connection.
Although SSH is used for transfer, for SSH tests,
checking the created filepaths is always
done through the local filesystem for speed
and convenience. As such, the drive that is
SSH to must also be mounted and the path
supplied to the location SSH'd to.
For speed, create the project once,
and all files to transfer. Then in the
test function, the folder are transferred.
Partial cleanup is done in the test function
i.e. deleting the central_path to which the
items have been transferred. This is achieved
by using "class" scope.
NOTES
-----
- Pytest params - The `params` key sets the
`params` attribute on the pytest `request` fixture.
This attribute is used to set the `testing_ssh` variable
to `True` or `False`. In the first run, this is set to
`False`, meaning local filesystem tests are run. In the
second run, this is set with a pytest parameter that is
`True` (i.e. SSH tests are run) but is skipped if `TEST_SSH`
in `ssh_config` (set in conftest.py` is `False`.
- For convenience, files are transferred
with SSH and then checked through the local filesystem
mount. This is significantly easier than checking
everything through SFTP. However, on Windows the
mounted filesystem is quite slow to update, taking
a few seconds after SSH transfer. This makes the
tests run very slowly. We can get rid
of this limitation on linux.
"""
testing_ssh = request.param
tmp_path = tmpdir_factory.mktemp("test")

base_path = tmp_path / "test with space"
if testing_ssh:
base_path = ssh_config.FILESYSTEM_PATH
central_path = ssh_config.SERVER_PATH
else:
base_path = tmp_path / "test with space"
central_path = base_path
test_project_name = "test_file_conflicts"

project, cwd = test_utils.setup_project_fixture(
base_path, test_project_name
)

if testing_ssh:
ssh_test_utils.setup_project_for_ssh(
project,
test_utils.make_test_path(
central_path, test_project_name, "central"
),
ssh_config.CENTRAL_HOST_ID,
ssh_config.USERNAME,
)

# Initialise the SSH connection
ssh_test_utils.setup_hostkeys(project)
shutil.copy(ssh_config.SSH_KEY_PATH, project.cfg.file_path.parent)

pathtable = get_pathtable(project.cfg["local_path"])
self.create_all_pathtable_files(pathtable)
project.testing_ssh = testing_ssh

yield [pathtable, project]

test_utils.teardown_project(cwd, project)

@pytest.fixture(
scope="class",
)
def ssh_setup(self, pathtable_and_project):
pathtable, project = pathtable_and_project
ssh_test_utils.build_docker_image(project)
ssh_test_utils.setup_hostkeys(project)

project.upload_all()

return [pathtable, project]
if testing_ssh:
for result in glob.glob(ssh_config.FILESYSTEM_PATH):
shutil.rmtree(result)

# -------------------------------------------------------------------------
# Utils
Expand All @@ -91,37 +121,101 @@ def central_from_local(self, path_):
# Test File Transfer - All Options
# -------------------------------------------------------------------------

@pytest.mark.parametrize("sub_names", PARAM_SUBS)
@pytest.mark.parametrize("ses_names", PARAM_SES)
@pytest.mark.parametrize("datatype", PARAM_DATATYPE)
@pytest.mark.parametrize(
"sub_names",
[
["all"],
["all_sub"],
["all_non_sub"],
["sub-001"],
["sub-003_date-20231901"],
["sub-002", "all_non_sub"],
],
)
@pytest.mark.parametrize(
"ses_names",
[
["all"],
["all_non_ses"],
["all_ses"],
["ses-001"],
["ses-002_random-key"],
["all_non_ses", "ses-001"],
],
)
@pytest.mark.parametrize(
"datatype",
[
["all"],
["all_ses_level_non_datatype"],
["all_datatype"],
["behav"],
["ephys"],
["histology"],
["funcimg"],
["histology", "behav", "all_ses_level_non_datatype"],
],
)
@pytest.mark.parametrize("upload_or_download", ["upload", "download"])
def test_combinations_filesystem_transfer(
def test_all_data_transfer_options(
self,
pathtable_and_project,
sub_names,
ses_names,
datatype,
upload_or_download,
):
""" """
"""
Parse the arguments to filter the pathtable, getting
the files expected to be transferred passed on the arguments
Note files in sub/ses/datatype folders must be handled
separately to those in non-sub, non-ses, non-datatype folders
see test_utils.swap_local_and_central_paths() for the logic
on setting up and swapping local / central paths for
upload / download tests.
"""
pathtable, project = pathtable_and_project

transfer_function = test_utils.handle_upload_or_download(
project,
upload_or_download,
swap_last_folder_only=False,
swap_last_folder_only=project.testing_ssh,
)[0]

transfer_function(sub_names, ses_names, datatype, init_log=False)

if upload_or_download == "download":
test_utils.swap_local_and_central_paths(
project, swap_last_folder_only=False
project, swap_last_folder_only=project.testing_ssh
)

expected_transferred_paths = self.get_expected_transferred_paths(
pathtable, sub_names, ses_names, datatype
sub_names = self.parse_arguments(pathtable, sub_names, "sub")
ses_names = self.parse_arguments(pathtable, ses_names, "ses")
datatype = self.parse_arguments(pathtable, datatype, "datatype")

# Filter pathtable to get files that were expected
# to be transferred
(
sub_ses_dtype_arguments,
extra_arguments,
) = self.make_pathtable_search_filter(sub_names, ses_names, datatype)

datatype_folders = self.query_table(pathtable, sub_ses_dtype_arguments)
extra_folders = self.query_table(pathtable, extra_arguments)

expected_paths = pd.concat([datatype_folders, extra_folders])
expected_paths = expected_paths.drop_duplicates(subset="path")

central_base_paths = expected_paths.base_folder.map(
lambda x: str(x).replace("local", "central")
)
expected_transferred_paths = central_base_paths / expected_paths.path

# When transferring with SSH, there is a delay before
# filesystem catches up
if project.testing_ssh:
time.sleep(0.5)

# Check what paths were actually moved
# (through the local filesystem), and test
Expand All @@ -143,116 +237,10 @@ def test_combinations_filesystem_transfer(
except FileNotFoundError:
pass

@pytest.mark.parametrize("sub_names", PARAM_SUBS)
@pytest.mark.parametrize("ses_names", PARAM_SES)
@pytest.mark.parametrize("datatype", PARAM_DATATYPE)
def test_combinations_ssh_transfer(
self,
ssh_setup,
sub_names,
ses_names,
datatype,
):
""" """
pathtable, project = ssh_setup

true_central_path = project.cfg["central_path"]
tmp_central_path = project.cfg["central_path"] / "tmp"
project.update_config("central_path", tmp_central_path)

breakpoint()
project.upload(sub_names, ses_names, datatype, init_log=False)

expected_transferred_paths = self.get_expected_transferred_paths(
pathtable, sub_names, ses_names, datatype
)

transferred_files = ssh_test_utils.recursive_search_central(project)

paths_to_transferred_files = self.remove_path_before_rawdata(
transferred_files
)

expected_transferred_paths_ = self.remove_path_before_rawdata(
expected_transferred_paths
)

assert sorted(paths_to_transferred_files) == sorted(
expected_transferred_paths_
)

with paramiko.SSHClient() as client:
ssh.connect_client(client, project.cfg)
client.exec_command(
f"rm -rf {(tmp_central_path).as_posix()}"
) # TODO: own function as need to do on teardown)

true_local_path = project.cfg["local_path"]
tmp_local_path = project.cfg["local_path"] / "tmp"
tmp_local_path.mkdir()
project.update_config("local_path", tmp_local_path)
project.update_config("central_path", true_central_path)

project.download(
sub_names, ses_names, datatype, init_log=False
) # TODO: why is this connecting so many times? [during search - make issue]

all_transferred = list((tmp_local_path / "rawdata").glob("**/*"))
all_transferred = [
path_ for path_ in all_transferred if path_.is_file()
]

paths_to_transferred_files = self.remove_path_before_rawdata(
all_transferred
)

assert sorted(paths_to_transferred_files) == sorted(
expected_transferred_paths_
)

shutil.rmtree(tmp_local_path)
project.update_config("local_path", true_local_path)

# ---------------------------------------------------------------------------------------------------------------
# Utils
# ---------------------------------------------------------------------------------------------------------------

def get_expected_transferred_paths(
self, pathtable, sub_names, ses_names, datatype
):
""""""
parsed_sub_names = self.parse_arguments(pathtable, sub_names, "sub")
parsed_ses_names = self.parse_arguments(pathtable, ses_names, "ses")
parsed_datatype = self.parse_arguments(pathtable, datatype, "datatype")

# Filter pathtable to get files that were expected to be transferred
(
sub_ses_dtype_arguments,
extra_arguments,
) = self.make_pathtable_search_filter(
parsed_sub_names, parsed_ses_names, parsed_datatype
)

datatype_folders = self.query_table(pathtable, sub_ses_dtype_arguments)
extra_folders = self.query_table(pathtable, extra_arguments)

expected_paths = pd.concat([datatype_folders, extra_folders])
expected_paths = expected_paths.drop_duplicates(subset="path")

central_base_paths = expected_paths.base_folder.map(
lambda x: str(x).replace("local", "central")
)
expected_transferred_paths = central_base_paths / expected_paths.path

return expected_transferred_paths

def remove_path_before_rawdata(self, list_of_paths):
cut_paths = []
for path_ in list_of_paths: # TODO: rename all filenames
parts = Path(path_).parts
cut_paths.append(Path(*parts[parts.index("rawdata") :]))
return cut_paths

def query_table(self, pathtable, arguments):
"""
Search the table for arguments, return empty
Expand Down

0 comments on commit 87d2319

Please sign in to comment.