From b4b27802cf43bd80b1bd793386fb820881c38765 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Fri, 9 Feb 2024 14:26:20 -0500 Subject: [PATCH 1/6] =?UTF-8?q?fix:=20=F0=9F=90=9B=20Fix=20broken=20depend?= =?UTF-8?q?encies=20for=20MacOS.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e0ac028a4..3e6f51064 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,11 @@ "funlib.math>=0.1", "funlib.geometry>=0.2", "mwatershed>=0.1", - "funlib.persistence>=0.1", + "funlib.persistence @ git+https://github.com/janelia-cellmap/funlib.persistence", "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", - "lsds>=0.1.3", + # "lsds>=0.1.3", + "lsds @ git+https://github.com/funkelab/lsd", "xarray", "cattrs", "numpy-indexed", From f243c7c1c033f7ab1ef7cc2b40593a806587b9c9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:47:04 -0500 Subject: [PATCH 2/6] styles fixes for mypy --- .../experiments/datasplits/datasets/arrays/tiff_array.py | 2 +- dacapo/experiments/model.py | 2 +- dacapo/predict.py | 3 +-- mypy.ini | 7 ++++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py index e16ef26e0..ccdf50376 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py @@ -56,7 +56,7 @@ def voxel_size(self) -> Coordinate: @lazy_property.LazyProperty def roi(self) -> Roi: - return Roi(self._offset * self.shape) + return Roi(self._offset, self.shape) @property def writable(self) -> bool: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index fe1f8e7d5..8ca2b2b9e 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -46,7 +46,7 @@ def forward(self, x): result = self.eval_activation(result) return result - def compute_output_shape(self, input_shape: Coordinate) -> Coordinate: + def compute_output_shape(self, input_shape: Coordinate) -> Tuple[int, Coordinate]: """Compute the spatial shape (i.e., not accounting for channels and batch dimensions) of this model, when fed a tensor of the given spatial shape as input.""" diff --git a/dacapo/predict.py b/dacapo/predict.py index 340517528..afe137fcb 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: Optional[np.dtype] = np.float32, # add necessary type conversions + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size @@ -59,7 +59,6 @@ def predict( model.num_out_channels, output_voxel_size, output_dtype, - overwrite=overwrite, ) # create gunpowder keys diff --git a/mypy.ini b/mypy.ini index 722c11df8..d41c2b58b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,8 @@ # Global options: [mypy] +exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ +# TODO remove this after fixing all the mypy errors @jeff # Per-module options: @@ -68,4 +70,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-mwatershed.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True + +[mypy-numpy_indexed.*] +ignore_missing_imports = True From cebc737c43c66f718eabcc7219253a0b529caae2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:50:37 -0500 Subject: [PATCH 3/6] update git action, fix doc and no more publish --- .github/workflows/black.yaml | 2 ++ .github/workflows/docs.yaml | 9 ++++---- .github/workflows/publish.yaml | 38 ---------------------------------- .github/workflows/tests.yaml | 3 +-- 4 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 .github/workflows/publish.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 533fd7c80..a9ebfdec7 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,5 +1,7 @@ name: black-action + on: [push, pull_request] + jobs: linter_name: name: runner / black diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 5a84cc86b..d8d7b388d 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,8 +1,7 @@ -name: Pages -on: - push: - branches: - - master +name: Generate Pages + +on: [push, pull_request] + jobs: docs: runs-on: ubuntu-latest diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml deleted file mode 100644 index 47d19b651..000000000 --- a/.github/workflows/publish.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PIPY_PASSWORD }} diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 020ca3074..132ee4d28 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,6 @@ name: Test -on: - push: +on: [push, pull_request] jobs: test: From 7feab6a7d6116ff8c768b0367d59f97f0e7f71d7 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 16:57:50 -0500 Subject: [PATCH 4/6] remove unfinished cli and apply from main --- dacapo/apply.py | 197 ++---------------------------------------------- dacapo/cli.py | 55 +++----------- mypy.ini | 2 - 3 files changed, 16 insertions(+), 238 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index b33cffe46..8ada300dd 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -1,200 +1,13 @@ import logging -from typing import Optional -from funlib.geometry import Roi, Coordinate -import numpy as np -from dacapo.experiments.datasplits.datasets.arrays.array import Array -from dacapo.experiments.datasplits.datasets.dataset import Dataset -from dacapo.experiments.run import Run - -from dacapo.experiments.tasks.post_processors.post_processor_parameters import ( - PostProcessorParameters, -) -import dacapo.experiments.tasks.post_processors as post_processors -from dacapo.store.array_store import LocalArrayIdentifier -from dacapo.predict import predict -from dacapo.compute_context import LocalTorch, ComputeContext -from dacapo.experiments.datasplits.datasets.arrays import ZarrArray -from dacapo.store import ( - create_config_store, - create_weights_store, -) - -from pathlib import Path logger = logging.getLogger(__name__) -def apply( - run_name: str, - input_container: Path or str, - input_dataset: str, - output_path: Path or str, - validation_dataset: Optional[Dataset or str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[PostProcessorParameters or str] = None, - roi: Optional[Roi or str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype or str] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, - file_format: str = "zarr", -): - """Load weights and apply a model to a dataset. If iteration is None, the best iteration based on the criterion is used. If roi is None, the whole input dataset is used.""" - if isinstance(output_dtype, str): - output_dtype = np.dtype(output_dtype) - - if isinstance(roi, str): - start, end = zip( - *[ - tuple(int(coord) for coord in axis.split(":")) - for axis in roi.strip("[]").split(",") - ] - ) - roi = Roi( - Coordinate(start), - Coordinate(end) - Coordinate(start), - ) - - assert (validation_dataset is not None and isinstance(criterion, str)) or ( - isinstance(iteration, int) - ), "Either validation_dataset and criterion, or iteration must be provided." - - # retrieving run - logger.info("Loading run %s", run_name) - config_store = create_config_store() - run_config = config_store.retrieve_run_config(run_name) - run = Run(run_config) - - # create weights store - weights_store = create_weights_store() - - # load weights - if iteration is None: - # weights_store._load_best(run, criterion) - iteration = weights_store.retrieve_best(run_name, validation_dataset, criterion) - logger.info("Loading weights for iteration %i", iteration) - weights_store.retrieve_weights(run, iteration) # shouldn't this be load_weights? - - # find the best parameters - if isinstance(validation_dataset, str): - val_ds_name = validation_dataset - validation_dataset = [ - dataset for dataset in run.datasplit.validate if dataset.name == val_ds_name - ][0] - logger.info("Finding best parameters for validation dataset %s", validation_dataset) - if parameters is None: - parameters = run.task.evaluator.get_overall_best_parameters( - validation_dataset, criterion - ) - assert ( - parameters is not None - ), "Unable to retieve parameters. Parameters must be provided explicitly." - - elif isinstance(parameters, str): - try: - post_processor_name = parameters.split("(")[0] - post_processor_kwargs = parameters.split("(")[1].strip(")").split(",") - post_processor_kwargs = { - key.strip(): value.strip() - for key, value in [arg.split("=") for arg in post_processor_kwargs] - } - for key, value in post_processor_kwargs.items(): - if value.isdigit(): - post_processor_kwargs[key] = int(value) - elif value.replace(".", "", 1).isdigit(): - post_processor_kwargs[key] = float(value) - except: - raise ValueError( - f"Could not parse parameters string {parameters}. Must be of the form 'post_processor_name(arg1=val1, arg2=val2, ...)'" - ) - try: - parameters = getattr(post_processors, post_processor_name)( - **post_processor_kwargs - ) - except Exception as e: - logger.error( - f"Could not instantiate post-processor {post_processor_name} with arguments {post_processor_kwargs}.", - exc_info=True, - ) - raise e - - assert isinstance( - parameters, PostProcessorParameters - ), "Parameters must be parsable to a PostProcessorParameters object." - - # make array identifiers for input, predictions and outputs - input_array_identifier = LocalArrayIdentifier(input_container, input_dataset) - input_array = ZarrArray.open_from_array_identifier(input_array_identifier) - roi = roi.snap_to_grid(input_array.voxel_size, mode="grow").intersect( - input_array.roi - ) - output_container = Path( - output_path, - "".join(Path(input_container).name.split(".")[:-1]) + f".{file_format}", - ) - prediction_array_identifier = LocalArrayIdentifier( - output_container, f"prediction_{run_name}_{iteration}" - ) - output_array_identifier = LocalArrayIdentifier( - output_container, f"output_{run_name}_{iteration}_{parameters}" - ) - +def apply(run_name: str, iteration: int, dataset_name: str): logger.info( - "Applying best results from run %s at iteration %i to dataset %s", - run.name, + "Applying results from run %s at iteration %d to dataset %s", + run_name, iteration, - Path(input_container, input_dataset), - ) - return apply_run( - run, - parameters, - input_array, - prediction_array_identifier, - output_array_identifier, - roi, - num_cpu_workers, - output_dtype, - compute_context, - overwrite, - ) - - -def apply_run( - run: Run, - parameters: PostProcessorParameters, - input_array: Array, - prediction_array_identifier: LocalArrayIdentifier, - output_array_identifier: LocalArrayIdentifier, - roi: Optional[Roi] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[np.dtype] = np.uint8, - compute_context: ComputeContext = LocalTorch(), - overwrite: bool = True, -): - """Apply the model to a dataset. If roi is None, the whole input dataset is used. Assumes model is already loaded.""" - run.model.eval() - - # render prediction dataset - logger.info("Predicting on dataset %s", prediction_array_identifier) - predict( - run.model, - input_array, - prediction_array_identifier, - output_roi=roi, - num_cpu_workers=num_cpu_workers, - output_dtype=output_dtype, - compute_context=compute_context, - overwrite=overwrite, + dataset_name, ) - - # post-process the output - logger.info("Post-processing output to dataset %s", output_array_identifier) - post_processor = run.task.post_processor - post_processor.set_prediction(prediction_array_identifier) - post_processor.process( - parameters, output_array_identifier, overwrite=overwrite, blockwise=True - ) - - logger.info("Done") - return + raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file diff --git a/dacapo/cli.py b/dacapo/cli.py index f97906508..732e74117 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -1,5 +1,3 @@ -from typing import Optional - import dacapo import click import logging @@ -42,52 +40,21 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run-name", required=True, type=str, help="The name of the run to apply." + "-r", "--run-name", required=True, type=str, help="The name of the run to use." ) @click.option( - "-ic", - "--input_container", + "-i", + "--iteration", required=True, - type=click.Path(exists=True, file_okay=False), + type=int, + help="The iteration weights and parameters to use.", ) -@click.option("-id", "--input_dataset", required=True, type=str) -@click.option("-op", "--output_path", required=True, type=click.Path(file_okay=False)) -@click.option("-vd", "--validation_dataset", type=str, default=None) -@click.option("-c", "--criterion", default="voi") -@click.option("-i", "--iteration", type=int, default=None) -@click.option("-p", "--parameters", type=str, default=None) @click.option( - "-roi", - "--roi", + "-r", + "--dataset", + required=True, type=str, - required=False, - help="The roi to predict on. Passed in as [lower:upper, lower:upper, ... ]", + help="The name of the dataset to apply the run to.", ) -@click.option("-w", "--num_cpu_workers", type=int, default=30) -@click.option("-dt", "--output_dtype", type=str, default="uint8") -def apply( - run_name: str, - input_container: str, - input_dataset: str, - output_path: str, - validation_dataset: Optional[str] = None, - criterion: Optional[str] = "voi", - iteration: Optional[int] = None, - parameters: Optional[str] = None, - roi: Optional[str] = None, - num_cpu_workers: int = 30, - output_dtype: Optional[str] = "uint8", -): - dacapo.apply( - run_name, - input_container, - input_dataset, - output_path, - validation_dataset, - criterion, - iteration, - parameters, - roi, - num_cpu_workers, - output_dtype, - ) +def apply(run_name, iteration, dataset_name): + dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index d41c2b58b..aadc732e4 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,8 +1,6 @@ # Global options: [mypy] -exclude = ^(dacapo/apply\.py|dacapo/cli\.py)$ -# TODO remove this after fixing all the mypy errors @jeff # Per-module options: From 5d77af06bc34118178aad9017f49621a9f150adc Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 17:14:27 -0500 Subject: [PATCH 5/6] fix test action, pytest 8.0.0 working --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 492c8e6f4..12afa83a4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black mypy -pytest +pytest==7.4.4 pytest-cov pytest-lazy-fixture \ No newline at end of file From e46acf0c4cfeda2af02d8a9285890e9ddedfbb66 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 9 Feb 2024 22:14:54 +0000 Subject: [PATCH 6/6] :art: Format Python code with psf/black --- dacapo/apply.py | 2 +- dacapo/cli.py | 2 +- dacapo/predict.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dacapo/apply.py b/dacapo/apply.py index 8ada300dd..434002ef6 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -10,4 +10,4 @@ def apply(run_name: str, iteration: int, dataset_name: str): iteration, dataset_name, ) - raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file + raise NotImplementedError("This function is not yet implemented.") diff --git a/dacapo/cli.py b/dacapo/cli.py index 732e74117..be59df0c0 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -57,4 +57,4 @@ def validate(run_name, iteration): help="The name of the dataset to apply the run to.", ) def apply(run_name, iteration, dataset_name): - dacapo.apply(run_name, iteration, dataset_name) \ No newline at end of file + dacapo.apply(run_name, iteration, dataset_name) diff --git a/dacapo/predict.py b/dacapo/predict.py index afe137fcb..1df4d779e 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,7 +24,7 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, - output_dtype: np.dtype = np.float32, # type: ignore + output_dtype: np.dtype = np.float32, # type: ignore overwrite: bool = False, ): # get the model's input and output size