From 55a3892eb0432244403bf3a02b10bd65d548850c Mon Sep 17 00:00:00 2001
From: David Ackerman <ackermand@janelia.hhmi.org>
Date: Fri, 9 Feb 2024 14:46:19 -0500
Subject: [PATCH 1/6] include and use more biases during watershed post
 processing of affinities

---
 .../post_processors/watershed_post_processor.py   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py
index 8fa6104bc..1a7c4627b 100644
--- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py
+++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py
@@ -24,7 +24,7 @@ def enumerate_parameters(self):
         """Enumerate all possible parameters of this post-processor. Should
         return instances of ``PostProcessorParameters``."""
 
-        for i, bias in enumerate([0.1, 0.5, 0.9]):
+        for i, bias in enumerate([0.1, 0.25, 0.5, 0.75, 0.9]):
             yield WatershedPostProcessorParameters(id=i, bias=bias)
 
     def set_prediction(self, prediction_array_identifier):
@@ -44,9 +44,9 @@ def process(self, parameters, output_array_identifier):
         # if a previous segmentation is provided, it must have a "grid graph"
         # in its metadata.
         pred_data = self.prediction_array[self.prediction_array.roi]
-        affs = pred_data[: len(self.offsets)]
+        affs = pred_data[: len(self.offsets)].astype(np.float64)
         segmentation = mws.agglom(
-            affs - 0.5,
+            affs - parameters.bias,
             self.offsets,
         )
         # filter fragments
@@ -59,12 +59,17 @@ def process(self, parameters, output_array_identifier):
         for fragment, mean in zip(
             fragment_ids, measurements.mean(average_affs, segmentation, fragment_ids)
         ):
-            if mean < 0.5:
+            if mean < parameters.bias:
                 filtered_fragments.append(fragment)
 
         filtered_fragments = np.array(filtered_fragments, dtype=segmentation.dtype)
         replace = np.zeros_like(filtered_fragments)
-        segmentation = npi.remap(segmentation, filtered_fragments, replace)
+
+        # DGA: had to add in flatten and reshape since remap (in particular indices) didn't seem to work with ndarrays for the input
+        if filtered_fragments.size > 0:
+            segmentation = npi.remap(
+                segmentation.flatten(), filtered_fragments, replace
+            ).reshape(segmentation.shape)
 
         output_array[self.prediction_array.roi] = segmentation
 

From 58c7abe1469d99c1152e058994d95f185e10cafa Mon Sep 17 00:00:00 2001
From: David Ackerman <ackermand@janelia.hhmi.org>
Date: Fri, 9 Feb 2024 14:47:21 -0500
Subject: [PATCH 2/6] include weighting argument for affinities+lsd loss

---
 dacapo/experiments/tasks/affinities_task.py    |  8 ++------
 .../tasks/affinities_task_config.py            | 18 ++----------------
 .../tasks/losses/affinities_loss.py            |  5 +++--
 3 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py
index 4a1b8cc4a..5f4ba82b3 100644
--- a/dacapo/experiments/tasks/affinities_task.py
+++ b/dacapo/experiments/tasks/affinities_task.py
@@ -12,12 +12,8 @@ def __init__(self, task_config):
         """Create a `DummyTask` from a `DummyTaskConfig`."""
 
         self.predictor = AffinitiesPredictor(
-            neighborhood=task_config.neighborhood,
-            lsds=task_config.lsds,
-            num_voxels=task_config.num_voxels,
-            downsample_lsds=task_config.downsample_lsds,
-            grow_boundary_iterations=task_config.grow_boundary_iterations,
+            neighborhood=task_config.neighborhood, lsds=task_config.lsds
         )
-        self.loss = AffinitiesLoss(len(task_config.neighborhood))
+        self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio)
         self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood)
         self.evaluator = InstanceEvaluator()
diff --git a/dacapo/experiments/tasks/affinities_task_config.py b/dacapo/experiments/tasks/affinities_task_config.py
index 0a94db79d..a50c2141e 100644
--- a/dacapo/experiments/tasks/affinities_task_config.py
+++ b/dacapo/experiments/tasks/affinities_task_config.py
@@ -30,23 +30,9 @@ class AffinitiesTaskConfig(TaskConfig):
             "It has been shown that lsds as an auxiliary task can help affinity predictions."
         },
     )
-    num_voxels: int = attr.ib(
-        default=20,
-        metadata={
-            "help_text": "The number of voxels to use for the gaussian sigma when computing lsds."
-        },
-    )
-    downsample_lsds: int = attr.ib(
+    lsds_to_affs_weight_ratio: float = attr.ib(
         default=1,
         metadata={
-            "help_text": "The amount to downsample the lsds. "
-            "This is useful for speeding up training and inference."
-        },
-    )
-    grow_boundary_iterations: int = attr.ib(
-        default=0,
-        metadata={
-            "help_text": "The number of iterations to run the grow boundaries algorithm. "
-            "This is useful for refining the boundaries of the affinities, and reducing merging of adjacent objects."
+            "help_text": "If training with lsds, set how much they should be weighted compared to affs."
         },
     )
diff --git a/dacapo/experiments/tasks/losses/affinities_loss.py b/dacapo/experiments/tasks/losses/affinities_loss.py
index 65ada8843..74fc7fe67 100644
--- a/dacapo/experiments/tasks/losses/affinities_loss.py
+++ b/dacapo/experiments/tasks/losses/affinities_loss.py
@@ -3,8 +3,9 @@
 
 
 class AffinitiesLoss(Loss):
-    def __init__(self, num_affinities: int):
+    def __init__(self, num_affinities: int, lsds_to_affs_weight_ratio: float):
         self.num_affinities = num_affinities
+        self.lsds_to_affs_weight_ratio = lsds_to_affs_weight_ratio
 
     def compute(self, prediction, target, weight):
         affs, affs_target, affs_weight = (
@@ -21,7 +22,7 @@ def compute(self, prediction, target, weight):
         return (
             torch.nn.BCEWithLogitsLoss(reduction="none")(affs, affs_target)
             * affs_weight
-        ).mean() + (
+        ).mean() + self.lsds_to_affs_weight_ratio * (
             torch.nn.MSELoss(reduction="none")(torch.nn.Sigmoid()(aux), aux_target)
             * aux_weight
         ).mean()

From ce71fb5b6b4957401d1dfd48239535d175f62463 Mon Sep 17 00:00:00 2001
From: David Ackerman <ackermand@janelia.hhmi.org>
Date: Fri, 9 Feb 2024 14:53:57 -0500
Subject: [PATCH 3/6] make predictor node optional

---
 .../experiments/trainers/gunpowder_trainer.py | 23 +++++++++++--------
 .../trainers/gunpowder_trainer_config.py      |  5 ++++
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py
index efec630f0..ef5a6bf75 100644
--- a/dacapo/experiments/trainers/gunpowder_trainer.py
+++ b/dacapo/experiments/trainers/gunpowder_trainer.py
@@ -42,6 +42,9 @@ def __init__(self, trainer_config):
         self.mask_integral_downsample_factor = 4
         self.clip_raw = trainer_config.clip_raw
 
+        # Testing out if calculating multiple times and multiplying is necessary
+        self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset
+
         self.scheduler = None
 
     def create_optimizer(self, model):
@@ -146,13 +149,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None):
             for augment in self.augments:
                 dataset_source += augment.node(raw_key, gt_key, mask_key)
 
-            # Add predictor nodes to dataset_source
-            dataset_source += DaCapoTargetFilter(
-                task.predictor,
-                gt_key=gt_key,
-                weights_key=dataset_weight_key,
-                mask_key=mask_key,
-            )
+            if self.add_predictor_nodes_to_dataset:
+                # Add predictor nodes to dataset_source
+                dataset_source += DaCapoTargetFilter(
+                    task.predictor,
+                    gt_key=gt_key,
+                    weights_key=dataset_weight_key,
+                    mask_key=mask_key,
+                )
 
             dataset_sources.append(dataset_source)
         pipeline = tuple(dataset_sources) + gp.RandomProvider(weights)
@@ -162,11 +166,12 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None):
             task.predictor,
             gt_key=gt_key,
             target_key=target_key,
-            weights_key=datasets_weight_key,
+            weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key,
             mask_key=mask_key,
         )
 
-        pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key)
+        if self.add_predictor_nodes_to_dataset:
+            pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key)
 
         # Trainer attributes:
         if self.num_data_fetchers > 1:
diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py
index ae4243059..8f5b7bd6d 100644
--- a/dacapo/experiments/trainers/gunpowder_trainer_config.py
+++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py
@@ -29,3 +29,8 @@ class GunpowderTrainerConfig(TrainerConfig):
     )
     min_masked: Optional[float] = attr.ib(default=0.15)
     clip_raw: bool = attr.ib(default=True)
+
+    add_predictor_nodes_to_dataset: Optional[bool] = attr.ib(
+        default=True,
+        metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"}
+    )

From 353b8cb9686c1b5c5cbdd3b06323615bcd132b71 Mon Sep 17 00:00:00 2001
From: davidackerman <davidackerman@users.noreply.github.com>
Date: Fri, 9 Feb 2024 19:54:31 +0000
Subject: [PATCH 4/6] :art: Format Python code with psf/black

---
 dacapo/experiments/trainers/gunpowder_trainer.py        | 8 ++++++--
 dacapo/experiments/trainers/gunpowder_trainer_config.py | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py
index ef5a6bf75..f5d8fcd52 100644
--- a/dacapo/experiments/trainers/gunpowder_trainer.py
+++ b/dacapo/experiments/trainers/gunpowder_trainer.py
@@ -43,7 +43,9 @@ def __init__(self, trainer_config):
         self.clip_raw = trainer_config.clip_raw
 
         # Testing out if calculating multiple times and multiplying is necessary
-        self.add_predictor_nodes_to_dataset = trainer_config.add_predictor_nodes_to_dataset
+        self.add_predictor_nodes_to_dataset = (
+            trainer_config.add_predictor_nodes_to_dataset
+        )
 
         self.scheduler = None
 
@@ -166,7 +168,9 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None):
             task.predictor,
             gt_key=gt_key,
             target_key=target_key,
-            weights_key=datasets_weight_key if self.add_predictor_nodes_to_dataset else weight_key,
+            weights_key=datasets_weight_key
+            if self.add_predictor_nodes_to_dataset
+            else weight_key,
             mask_key=mask_key,
         )
 
diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py
index 8f5b7bd6d..539e3c5e1 100644
--- a/dacapo/experiments/trainers/gunpowder_trainer_config.py
+++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py
@@ -32,5 +32,7 @@ class GunpowderTrainerConfig(TrainerConfig):
 
     add_predictor_nodes_to_dataset: Optional[bool] = attr.ib(
         default=True,
-        metadata={"help_text": "Whether to add a predictor node to dataset_source and apply product of weights"}
+        metadata={
+            "help_text": "Whether to add a predictor node to dataset_source and apply product of weights"
+        },
     )

From e46acf0c4cfeda2af02d8a9285890e9ddedfbb66 Mon Sep 17 00:00:00 2001
From: mzouink <mzouink@users.noreply.github.com>
Date: Fri, 9 Feb 2024 22:14:54 +0000
Subject: [PATCH 5/6] :art: Format Python code with psf/black

---
 dacapo/apply.py   | 2 +-
 dacapo/cli.py     | 2 +-
 dacapo/predict.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dacapo/apply.py b/dacapo/apply.py
index 8ada300dd..434002ef6 100644
--- a/dacapo/apply.py
+++ b/dacapo/apply.py
@@ -10,4 +10,4 @@ def apply(run_name: str, iteration: int, dataset_name: str):
         iteration,
         dataset_name,
     )
-    raise NotImplementedError("This function is not yet implemented.")
\ No newline at end of file
+    raise NotImplementedError("This function is not yet implemented.")
diff --git a/dacapo/cli.py b/dacapo/cli.py
index 732e74117..be59df0c0 100644
--- a/dacapo/cli.py
+++ b/dacapo/cli.py
@@ -57,4 +57,4 @@ def validate(run_name, iteration):
     help="The name of the dataset to apply the run to.",
 )
 def apply(run_name, iteration, dataset_name):
-    dacapo.apply(run_name, iteration, dataset_name)
\ No newline at end of file
+    dacapo.apply(run_name, iteration, dataset_name)
diff --git a/dacapo/predict.py b/dacapo/predict.py
index afe137fcb..1df4d779e 100644
--- a/dacapo/predict.py
+++ b/dacapo/predict.py
@@ -24,7 +24,7 @@ def predict(
     num_cpu_workers: int = 4,
     compute_context: ComputeContext = LocalTorch(),
     output_roi: Optional[Roi] = None,
-    output_dtype: np.dtype = np.float32,   # type: ignore
+    output_dtype: np.dtype = np.float32,  # type: ignore
     overwrite: bool = False,
 ):
     # get the model's input and output size

From 232047c75bcffa37760f40b739d6dcf346107859 Mon Sep 17 00:00:00 2001
From: mzouink <mzouink@users.noreply.github.com>
Date: Fri, 9 Feb 2024 22:16:46 +0000
Subject: [PATCH 6/6] :art: Format Python code with psf/black

---
 dacapo/experiments/tasks/affinities_task.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py
index 5f4ba82b3..859494e7e 100644
--- a/dacapo/experiments/tasks/affinities_task.py
+++ b/dacapo/experiments/tasks/affinities_task.py
@@ -14,6 +14,8 @@ def __init__(self, task_config):
         self.predictor = AffinitiesPredictor(
             neighborhood=task_config.neighborhood, lsds=task_config.lsds
         )
-        self.loss = AffinitiesLoss(len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio)
+        self.loss = AffinitiesLoss(
+            len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio
+        )
         self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood)
         self.evaluator = InstanceEvaluator()