-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
finalize import/export and add tests
- Loading branch information
Showing
6 changed files
with
403 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import os | ||
from medperf.entities.dataset import Dataset | ||
from medperf.utils import tar | ||
import medperf.config as config | ||
from medperf.exceptions import ExecutionError | ||
import yaml | ||
|
||
|
||
class ExportDataset: | ||
@classmethod | ||
def run(cls, dataset_id: str, output_path: str): | ||
export_dataset = cls(dataset_id, output_path) | ||
export_dataset.prepare() | ||
export_dataset.create_tar() | ||
|
||
def __init__(self, dataset_id: str, output_path: str): | ||
self.dataset_id = dataset_id | ||
self.output_path = os.path.join(output_path, dataset_id) + ".gz" | ||
self.folders_paths = [] | ||
self.dataset = Dataset.get(self.dataset_id) | ||
self.dataset_storage = self.dataset.get_storage_path() | ||
self.dataset_tar_folder = ( | ||
config.dataset_backup_foldername + self.dataset_id | ||
) # name of the folder that will contain the backup | ||
|
||
def _prepare_development_dataset(self): | ||
raw_data_paths = self.dataset.get_raw_paths() | ||
|
||
for folder in raw_data_paths: | ||
# checks if raw_data_path exists and not empty | ||
if not (os.path.exists(folder) and os.listdir(folder)): | ||
raise ExecutionError(f"Cannot find raw data paths at '{folder}'") | ||
self.folders_paths.append(folder) | ||
|
||
data_path, labels_path = raw_data_paths | ||
self.paths["data"] = os.path.basename(data_path) | ||
self.paths["labels"] = os.path.basename(labels_path) | ||
|
||
def prepare(self): | ||
# Gets server name to be added in paths.yaml for comparing between local and remote servers | ||
# which will save folders names (what each one points to. | ||
dataset_path = os.path.join(self.dataset_storage, self.dataset_id) | ||
self.folders_paths.append(dataset_path) | ||
self.paths = {"server": config.server, "dataset": self.dataset_id} | ||
|
||
# If the dataset is in development, it'll need the raw paths as well. | ||
if self.dataset.state == "DEVELOPMENT": | ||
self._prepare_development_dataset() | ||
|
||
paths_path = os.path.join(config.tmp_folder, config.backup_config_filename) | ||
|
||
# paths.yaml will be created in medperf tmp directory | ||
with open(paths_path, "w") as f: | ||
yaml.dump(self.paths, f) | ||
|
||
self.folders_paths.append(paths_path) | ||
config.tmp_paths.append(paths_path) | ||
|
||
def create_tar(self): | ||
tar(self.output_path, self.folders_paths, self.dataset_tar_folder) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import os | ||
from medperf.exceptions import ExecutionError | ||
import pytest | ||
|
||
from medperf.tests.mocks.dataset import TestDataset | ||
from medperf.commands.dataset.export_dataset import ExportDataset | ||
|
||
|
||
PATCH_EXPORT = "medperf.commands.dataset.export_dataset.{}" | ||
|
||
|
||
@pytest.fixture | ||
def dataset(mocker): | ||
dset = TestDataset(id=None, state="DEVELOPMENT") | ||
return dset | ||
|
||
|
||
@pytest.fixture | ||
def export_dataset(mocker, dataset): | ||
mocker.patch(PATCH_EXPORT.format("Dataset.get"), return_value=dataset) | ||
dataclass = ExportDataset("", "") | ||
return dataclass | ||
|
||
|
||
def test_export_fail_if_development_dataset_raw_paths_does_not_exist( | ||
mocker, export_dataset | ||
): | ||
|
||
# Arrange | ||
mocker.patch( | ||
PATCH_EXPORT.format("Dataset.get_raw_paths"), return_value=["test", "test1"] | ||
) | ||
|
||
# Act & Assert | ||
with pytest.raises(ExecutionError): | ||
export_dataset.prepare() | ||
|
||
|
||
def test_export_fail_if_development_dataset_raw_paths_are_empty(mocker, export_dataset): | ||
|
||
# Arrange | ||
mocker.patch( | ||
PATCH_EXPORT.format("Dataset.get_raw_paths"), return_value=["/test", "/test1"] | ||
) | ||
os.makedirs("/test") | ||
os.makedirs("/test1") | ||
|
||
# Act & Assert | ||
with pytest.raises(ExecutionError): | ||
export_dataset.prepare() | ||
|
||
|
||
def test_export_if_development_dataset_length_of_yaml_paths_keys_equal_4( | ||
mocker, export_dataset, fs | ||
): | ||
|
||
# Arrange | ||
mocker.patch( | ||
PATCH_EXPORT.format("Dataset.get_raw_paths"), return_value=["/test", "/test1"] | ||
) | ||
os.makedirs("/test") | ||
os.makedirs("/test1") | ||
fs.create_file("/test/testfile") | ||
fs.create_file("/test1/testfile") | ||
|
||
# Act | ||
export_dataset.prepare() | ||
|
||
# Assert | ||
assert len(export_dataset.paths.keys()) == 4 | ||
|
||
|
||
def test_export_if_operation_dataset_length_of_yaml_paths_keys_equal_2(export_dataset): | ||
|
||
# Arrange | ||
export_dataset.dataset.state = "OPERATION" | ||
|
||
# Act | ||
export_dataset.prepare() | ||
|
||
# Assert | ||
assert len(export_dataset.paths.keys()) == 2 | ||
|
||
|
||
def test_export_if_tar_gz_file_is_created_at_output_path(export_dataset): | ||
|
||
# Arrange | ||
export_dataset.dataset.state = "OPERATION" | ||
export_dataset.dataset_id = "1" | ||
export_dataset.output_path = f"/test/{export_dataset.dataset_id}.gz" | ||
os.makedirs("/test/") | ||
|
||
# Act | ||
export_dataset.create_tar() | ||
|
||
# Assert | ||
assert os.path.exists(export_dataset.output_path) is True |
Oops, something went wrong.