From 432f1eee58a34e6bc14ff2548c63922ca74aea7c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 6 Oct 2023 17:00:09 +0100 Subject: [PATCH] refactor pytest fixtures to conftest.py. add adocstrings. --- tests/test_integration/conftest.py | 194 ++++++++++++ .../test_cellfinder_workflow.py | 287 +++++++----------- 2 files changed, 297 insertions(+), 184 deletions(-) create mode 100644 tests/test_integration/conftest.py diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py new file mode 100644 index 00000000..8ef61168 --- /dev/null +++ b/tests/test_integration/conftest.py @@ -0,0 +1,194 @@ +import json +from pathlib import Path + +import pooch +import pytest + +from brainglobe_workflows.cellfinder.cellfinder_main import ( + CellfinderConfig, +) + + +def make_config_dict_fetch_from_GIN(cellfinder_cache_dir): + """Generate a config dictionary with the required parameters + for the workflow + + The input data is fetched from GIN and downloaded to cellfinder_cache_dir. + The results are also saved in a timestamped output subdirectory under + cellfinder_cache_dir + + Parameters + ---------- + cellfinder_cache_dir : Path + Path to the directory where the downloaded input data will be unzipped, + and the output will be saved + + Returns + ------- + dict + dictionary with the required parameters for the workflow + """ + return { + "install_path": cellfinder_cache_dir, + "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "data_hash": ( + "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914" + ), + "extract_relative_dir": "cellfinder_test_data", # relative path + "signal_parent_dir": str( + cellfinder_cache_dir / "cellfinder_test_data" / "signal" + ), + "background_parent_dir": str( + cellfinder_cache_dir / "cellfinder_test_data" / "background" + ), + "output_path_basename": cellfinder_cache_dir / "cellfinder_output_", + "detected_cells_filename": "detected_cells.xml", + "voxel_sizes": [5, 2, 2], # microns + "start_plane": 0, + "end_plane": -1, + "trained_model": None, # if None, it will use a default model + "model_weights": None, + "model": "resnet50_tv", + "batch_size": 32, + "n_free_cpus": 2, + "network_voxel_sizes": [5, 1, 1], + "soma_diameter": 16, + "ball_xy_size": 6, + "ball_z_size": 15, + "ball_overlap_fraction": 0.6, + "log_sigma_size": 0.2, + "n_sds_above_mean_thresh": 10, + "soma_spread_factor": 1.4, + "max_cluster_size": 100000, + "cube_width": 50, + "cube_height": 50, + "cube_depth": 20, + "network_depth": "50", + } + + +def prep_json(obj): + """ + Returns a JSON encodable version of the object. + + It uses the JSON default encoder for all objects + except those of type Path. + + Parameters + ---------- + obj : _type_ + _description_ + + Returns + ------- + _type_ + JSON serializable version of input object + """ + if isinstance(obj, Path): + return str(obj) + else: + return json.JSONEncoder.default(obj) + + +@pytest.fixture(autouse=True) +def cellfinder_cache_dir(tmp_path): + """Create a .cellfinder_workflows directory + under a temporary pytest directory and return + its path. + + The temporary directory is available via pytest's tmp_path + fixture. A new temporary directory is created every function call + + Parameters + ---------- + tmp_path : Path + path to pytest-generated temporary directory + + Returns + ------- + Path + path to the created cellfinder_workflows cache directory + """ + + return Path(tmp_path) / ".cellfinder_workflows" + + +@pytest.fixture() +def path_to_config_fetch_GIN(tmp_path, cellfinder_cache_dir): + """Create an input config that fetches data from GIN and + return its path + + Parameters + ---------- + tmp_path : Path + path to a fresh pytest-generated temporary directory. The + generated config is saved here. + + cellfinder_cache_dir : Path + path to the cellfinder cache directory, where the paths + in the config should point to. + + Returns + ------- + input_config_path : Path + path to config file that fetches data from GIN + """ + # create config dict + config_dict = make_config_dict_fetch_from_GIN(cellfinder_cache_dir) + + # create a temp json file to dump config data + input_config_path = ( + tmp_path / "input_config.json" + ) # save it in a temp dir separate from cellfinder_cache_dir + + # save config data to json file + with open(input_config_path, "w") as js: + json.dump(config_dict, js, default=prep_json) + + # check json file exists + assert Path(input_config_path).is_file() + + return input_config_path + + +@pytest.fixture() +def path_to_config_fetch_local(path_to_config_fetch_GIN): + """Create an input config that points to local data and + return its path. + + The process is analogous to creating a config that + fetches data from GIN, except that here we download the data + from GIN prior to running the workflow. + + Parameters + ---------- + path_to_config_fetch_GIN : Path + path to a config file that fetches data from GIN + + Returns + ------- + path_to_config_fetch_GIN : Path + path to a config file that fetches data from GIN + """ + # create config that fetches data from GIN + # path_to_config_fetch_GIN + + # read into config class + with open(path_to_config_fetch_GIN) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # Download GIN data + pooch.retrieve( + url=config.data_url, + known_hash=config.data_hash, + path=config.install_path, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=config.extract_relative_dir + # path to unzipped dir, *relative* to 'path' + ), + ) + + # return path to config json + return path_to_config_fetch_GIN diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py index c6168c4b..d82293a4 100644 --- a/tests/test_integration/test_cellfinder_workflow.py +++ b/tests/test_integration/test_cellfinder_workflow.py @@ -3,196 +3,33 @@ import sys from pathlib import Path -import pooch -import pytest - from brainglobe_workflows.cellfinder.cellfinder_main import ( DEFAULT_JSON_CONFIG_PATH, CellfinderConfig, ) -## Utils -def make_config_dict_fetch_from_GIN(cellfinder_cache_dir): - """Generate a config dictionary with the required parameters - for the workflow - - The input data is fetched from GIN and downloaded to - the location provided by cellfinder_cache_dir. The results are - also saved in a timestamped output subdirectory under cellfinder_cache_dir - - Parameters - ---------- - cellfinder_cache_dir : _type_ - _description_ - - Returns - ------- - dict - dictionary with the required parameters for the workflow - """ - return { - "install_path": cellfinder_cache_dir, - "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "data_hash": ( - "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914" - ), - "extract_relative_dir": "cellfinder_test_data", # relative path - "signal_parent_dir": str( - cellfinder_cache_dir / "cellfinder_test_data" / "signal" - ), - "background_parent_dir": str( - cellfinder_cache_dir / "cellfinder_test_data" / "background" - ), - "output_path_basename": cellfinder_cache_dir / "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [5, 2, 2], # microns - "start_plane": 0, - "end_plane": -1, - "trained_model": None, # if None, it will use a default model - "model_weights": None, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [5, 1, 1], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50", - } - - -# ensure all Paths are JSON serializable -def prep_json(obj): - if isinstance(obj, Path): - return str(obj) - else: - return json.JSONEncoder.default(obj) - - -def assert_outputs(path_to_config, parent_dir=""): - # load input config - # ATT! config.output_path is only defined after the workflow - # setup, because its name is timestamped - with open(path_to_config) as cfg: - config_dict = json.load(cfg) - config = CellfinderConfig(**config_dict) - - # check one output directory exists and - # it has expected output file inside it - output_path_timestamped = [ - x - for x in (Path(parent_dir) / config.output_path_basename).parent.glob( - "*" - ) - if x.is_dir() - and x.name.startswith(Path(config.output_path_basename).name) - ] - - assert len(output_path_timestamped) == 1 - assert (output_path_timestamped[0]).exists() - assert ( - output_path_timestamped[0] / config.detected_cells_filename - ).is_file() - - -### Fixtures -@pytest.fixture(autouse=True) -def cellfinder_cache_dir(tmp_path): - """Create a .cellfinder_workflows directory - under a temporary pytest directory. - - It uses pytest's tmp_path fixture so that all is cleared after the test. - A new temporary directory is created every function call (scope="function" - by default) - - Parameters - ---------- - tmp_path : _type_ - _description_ - - Returns - ------- - Path - path to the created cache directory - """ - - return Path(tmp_path) / ".cellfinder_workflows" +def test_run_with_default_config(tmp_path): + """Test workflow run with no command line arguments + If no command line arguments are provided, the default + config at brainglobe_workflows/cellfinder/default_config.json + should be used. -@pytest.fixture() -def path_to_config_fetch_GIN(tmp_path, cellfinder_cache_dir): - """Create an input config that fetches data from GIN and - return its path + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the default config file was used, and + - a single output directory exists with the expected + output file inside it Parameters ---------- - tmp_path : _type_ - _description_ - cellfinder_cache_dir : _type_ - _description_ - - Returns - ------- - _type_ - _description_ + tmp_path : Path + path to a pytest-generated temporary directory. """ - # create config dict - config_dict = make_config_dict_fetch_from_GIN(cellfinder_cache_dir) - - # create a temp json file to dump config data - input_config_path = ( - tmp_path / "input_config.json" - ) # save it in a temp dir separate from cellfinder_cache_dir - - # save config data to json file - with open(input_config_path, "w") as js: - json.dump(config_dict, js, default=prep_json) - - # check json file exists - assert Path(input_config_path).is_file() - - return input_config_path - -@pytest.fixture() -def path_to_config_fetch_local(path_to_config_fetch_GIN): - # create config that fetches data from GIN - # path_to_config_fetch_GIN - - # read into config class - with open(path_to_config_fetch_GIN) as cfg: - config_dict = json.load(cfg) - config = CellfinderConfig(**config_dict) - - # Download GIN data - pooch.retrieve( - url=config.data_url, - known_hash=config.data_hash, - path=config.install_path, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=config.extract_relative_dir - # path to unzipped dir, *relative* to 'path' - ), - ) - - # return path to config json - return path_to_config_fetch_GIN - - -### Tests -def test_run_with_default_config(tmp_path): - # run workflow with CLI and capture log - # with cwd = pytest tmp_path <------- + # run workflow with no CLI arguments, + # with cwd=tmp_path subprocess_output = subprocess.run( [ sys.executable, @@ -218,9 +55,24 @@ def test_run_with_default_config(tmp_path): assert_outputs(DEFAULT_JSON_CONFIG_PATH, tmp_path) -def test_run_with_config_GIN( +def test_run_with_GIN_data( path_to_config_fetch_GIN, ): + """Test workflow runs when passing a config that fetches data + from the GIN repository + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was downloaded from GIN, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ # run workflow with CLI and capture log subprocess_output = subprocess.run( [ @@ -251,15 +103,31 @@ def test_run_with_config_GIN( in subprocess_output.stdout ) - # check output directory - # check one output directory exists and has expected output file inside it + # check one output directory exists and + # has expected output file inside it assert_outputs(path_to_config_fetch_GIN) -def test_run_with_config_local( +def test_run_with_local_data( path_to_config_fetch_local, ): - # run workflow with CLI and capture log + """Test workflow runs when passing a config that uses + local data + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was found locally, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + + # run workflow with CLI subprocess_output = subprocess.run( [ sys.executable, @@ -289,6 +157,57 @@ def test_run_with_config_local( in subprocess_output.stdout ) - # check output directory - # check one output directory exists and has expected output file inside it + # check one output directory exists and + # has expected output file inside it assert_outputs(path_to_config_fetch_local) + + +def assert_outputs(path_to_config, parent_dir=""): + """Helper function to determine whether the output is + as expected. + + It checks that: + - a single output directory exists, and + - the expected output file exists inside it + + Note that config.output_path is only defined after the workflow + setup is run, because its name is timestamped. Therefore, + we search for an output directory based on config.output_path_basename. + + Parameters + ---------- + path_to_config : Path + path to the input config used to generate the + output. + + parent_dir : str, optional + If the paths in the input config are expressed as + relative, the absolute path to their parent_dir + must be specified here. If the paths in the input + config are absolute, this input is not required. + By default "". + """ + + # load input config + # ATT! config.output_path is only defined after the workflow + # setup is run, because its name is timestamped! + with open(path_to_config) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # check one output directory exists and + # it has expected output file inside it + output_path_timestamped = [ + x + for x in (Path(parent_dir) / config.output_path_basename).parent.glob( + "*" + ) + if x.is_dir() + and x.name.startswith(Path(config.output_path_basename).name) + ] + + assert len(output_path_timestamped) == 1 + assert (output_path_timestamped[0]).exists() + assert ( + output_path_timestamped[0] / config.detected_cells_filename + ).is_file()