ISS refactored with new spot finding path

spacetx · Sep 26, 2019 · fb9b662 · fb9b662
1 parent c220e44
commit fb9b662
Show file tree

Hide file tree

Showing 17 changed files with 406 additions and 104 deletions.
diff --git a/docs/source/_static/data_processing_examples/iss_pipeline.py b/docs/source/_static/data_processing_examples/iss_pipeline.py
@@ -7,7 +7,7 @@
 
 import starfish
 from starfish.image import ApplyTransform, Filter, LearnTransform, Segment
-from starfish.spots import DetectSpots, AssignTargets
+from starfish.spots import FindSpots, DecodeSpots, AssignTargets
 from starfish.types import Axes
 
 test = os.getenv("TESTING") is not None
@@ -30,22 +30,23 @@ def iss_pipeline(fov, codebook):
     filt = Filter.WhiteTophat(masking_radius, is_volume=False)
     filtered = filt.run(registered, verbose=True, in_place=False)
 
-    # detect spots using laplacian of gaussians approach
-    p = DetectSpots.BlobDetector(
+    bd = FindSpots.BlobDetector(
         min_sigma=1,
         max_sigma=10,
         num_sigma=30,
         threshold=0.01,
         measurement_type='mean',
     )
 
-    intensities = p.run(
-        filtered,
-        blobs_image=fov.get_image('dots'),
-        blobs_axes=(Axes.ROUND, Axes.ZPLANE))
+    # detect spots using laplacian of gaussians approach
+    dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
+    dots_max = dots_max_projector.run(fov.get_image('dots'))
+    # locate spots in a reference image
+    spots = bd.run(reference_image=dots_max, image_stack=filtered)
 
     # decode the pixel traces using the codebook
-    decoded = codebook.decode_per_round_max(intensities)
+    decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook)
+    decoded = decoder.run(spots=spots)
 
     # segment cells
     seg = Segment.Watershed(

diff --git a/notebooks/ISS.ipynb b/notebooks/ISS.ipynb
@@ -82,7 +82,7 @@
    "source": [
     "## Visualize Codebook\n",
     "\n",
-    "The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. "
+    "The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'."
    ]
   },
   {
@@ -199,12 +199,12 @@
     "f, (ax1, ax2) = plt.subplots(ncols=2)\n",
     "vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])\n",
     "imshow_plane(\n",
-    "    single_plane, ax=ax1, vmin=vmin, vmax=vmax, \n",
+    "    single_plane, ax=ax1, vmin=vmin, vmax=vmax,\n",
     "    title=\"Original data\\nRound: 0, Channel: 0\"\n",
     ")\n",
     "vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])\n",
     "imshow_plane(\n",
-    "    single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax, \n",
+    "    single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,\n",
     "    title=\"Filtered data\\nRound: 0, Channel: 0\"\n",
     ")"
    ]
@@ -220,7 +220,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain. "
+    "Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain."
    ]
   },
   {
@@ -252,61 +252,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from starfish.spots import DetectSpots\n",
     "import warnings\n",
+    "from starfish.spots import FindSpots, DecodeSpots\n",
     "\n",
-    "# parameters to define the allowable gaussian sizes (parameter space)\n",
-    "p = DetectSpots.BlobDetector(\n",
+    "bd = FindSpots.BlobDetector(\n",
     "    min_sigma=1,\n",
     "    max_sigma=10,\n",
     "    num_sigma=30,\n",
     "    threshold=0.01,\n",
     "    measurement_type='mean',\n",
     ")\n",
     "\n",
-    "# blobs = dots; define the spots in the dots image, but then find them again in the stack.\n",
-    "intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The resulting \"Intensity Table\" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "intensities"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "decoded = experiment.codebook.decode_per_round_max(intensities)\n",
-    "decoded"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func=\"max\", module=Filter.Reduce.FunctionSource.np)\n",
+    "dots_max = dots_max_projector.run(dots)\n",
+    "# locate spots in a reference image\n",
+    "# spots = lp.run(reference_image=dots_max, image_stack=registered_imgs)\n",
+    "spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)\n",
+    "\n",
+    "decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)\n",
+    "decoded = decoder.run(spots=spots)\n",
+    "\n",
+    "# EPY: START code\n",
     "# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.\n",
     "genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)\n",
     "table = pd.Series(counts, index=genes).sort_values(ascending=False)\n",

diff --git a/notebooks/py/ISS.py b/notebooks/py/ISS.py
@@ -56,7 +56,7 @@
 # EPY: START markdown
 ### Visualize Codebook
 #
-#The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. 
+#The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'.
 # EPY: END markdown
 
 # EPY: START code
@@ -131,12 +131,12 @@
 f, (ax1, ax2) = plt.subplots(ncols=2)
 vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])
 imshow_plane(
-    single_plane, ax=ax1, vmin=vmin, vmax=vmax, 
+    single_plane, ax=ax1, vmin=vmin, vmax=vmax,
     title="Original data\nRound: 0, Channel: 0"
 )
 vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])
 imshow_plane(
-    single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax, 
+    single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,
     title="Filtered data\nRound: 0, Channel: 0"
 )
 # EPY: END code
@@ -146,7 +146,7 @@
 # EPY: END markdown
 
 # EPY: START markdown
-#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain. 
+#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain.
 # EPY: END markdown
 
 # EPY: START code
@@ -165,40 +165,27 @@
 # EPY: END markdown
 
 # EPY: START code
-from starfish.spots import DetectSpots
 import warnings
+from starfish.spots import FindSpots, DecodeSpots
 
-# parameters to define the allowable gaussian sizes (parameter space)
-p = DetectSpots.BlobDetector(
+bd = FindSpots.BlobDetector(
     min_sigma=1,
     max_sigma=10,
     num_sigma=30,
     threshold=0.01,
     measurement_type='mean',
 )
 
-# blobs = dots; define the spots in the dots image, but then find them again in the stack.
-intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))
-# EPY: END code
+dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
+dots_max = dots_max_projector.run(dots)
+# locate spots in a reference image
+# spots = lp.run(reference_image=dots_max, image_stack=registered_imgs)
+spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)
 
-# EPY: START markdown
-#The resulting "Intensity Table" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images
-# EPY: END markdown
+decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)
+decoded = decoder.run(spots=spots)
 
-# EPY: START code
-intensities
-# EPY: END code
-
-# EPY: START markdown
-#To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table.
-# EPY: END markdown
-
-# EPY: START code
-decoded = experiment.codebook.decode_per_round_max(intensities)
-decoded
-# EPY: END code
-
-# EPY: START code
+# EPY: ESCAPE # EPY: START code
 # Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.
 genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)
 table = pd.Series(counts, index=genes).sort_values(ascending=False)

diff --git a/starfish/core/expression_matrix/test/test_serialization.py b/starfish/core/expression_matrix/test/test_serialization.py
@@ -11,7 +11,7 @@ def test_save_expression_matrix():
 
     codebook = codebook_array_factory()
 
-    decoded_intensities = factories.synthetic_decoded_intenisty_table(
+    decoded_intensities = factories.synthetic_decoded_intensity_table(
         codebook,
         num_z=3,
         height=100,

diff --git a/starfish/core/intensity_table/test/factories.py b/starfish/core/intensity_table/test/factories.py
@@ -11,7 +11,7 @@ def synthetic_intensity_table() -> IntensityTable:
     return IntensityTable.synthetic_intensities(loaded_codebook(), n_spots=2)
 
 
-def synthetic_decoded_intenisty_table(
+def synthetic_decoded_intensity_table(
         codebook,
         num_z: int = 12,
         height: int = 50,

diff --git a/starfish/core/intensity_table/test/test_intensity_table_coords.py b/starfish/core/intensity_table/test/test_intensity_table_coords.py
@@ -8,7 +8,7 @@
 from starfish.core.codebook.test.factories import codebook_array_factory
 from starfish.core.imagestack.test.factories import imagestack_with_coords_factory
 from starfish.core.types import Axes, Coordinates, Features, PhysicalCoordinateTypes
-from .factories import synthetic_decoded_intenisty_table
+from .factories import synthetic_decoded_intensity_table
 from ..intensity_table import IntensityTable
 from ..intensity_table_coordinates import (
     transfer_physical_coords_to_intensity_table,
@@ -93,7 +93,7 @@ def test_tranfering_physical_coords_to_expression_matrix():
     stack = imagestack_with_coords_factory(stack_shape, physical_coords)
     codebook = codebook_array_factory()
 
-    decoded_intensities = synthetic_decoded_intenisty_table(
+    decoded_intensities = synthetic_decoded_intensity_table(
         codebook,
         num_z=stack_shape[Axes.ZPLANE],
         height=stack_shape[Axes.Y],

diff --git a/starfish/core/spots/DecodeSpots/__init__.py b/starfish/core/spots/DecodeSpots/__init__.py
@@ -1,5 +1,5 @@
 from ._base import DecodeSpotsAlgorithm
-
+from .per_round_max_channel_decoder import PerRoundMaxChannel
 
 # autodoc's automodule directive only captures the modules explicitly listed in __all__.
 all_filters = {

diff --git a/starfish/core/spots/DecodeSpots/per_round_max_channel_decoder.py b/starfish/core/spots/DecodeSpots/per_round_max_channel_decoder.py
@@ -0,0 +1,45 @@
+from starfish.core.codebook.codebook import Codebook
+from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable
+from starfish.core.intensity_table.intensity_table_coordinates import \
+    transfer_physical_coords_to_intensity_table
+from starfish.core.spots.DecodeSpots.trace_builders import build_spot_traces_exact_match
+from starfish.core.types import SpotFindingResults
+from ._base import DecodeSpotsAlgorithm
+
+
+class PerRoundMaxChannel(DecodeSpotsAlgorithm):
+    """
+    Decode spots by selecting the max-valued channel in each sequencing round.
+
+    Note that this assumes that the codebook contains only one "on" channel per sequencing round,
+    a common pattern in experiments that assign one fluorophore to each DNA nucleotide and
+    read DNA sequentially. It is also a characteristic of single-molecule FISH and RNAscope
+    codebooks.
+
+    Parameters
+    ----------
+    codebook : Codebook
+        Contains codes to decode IntensityTable
+
+    """
+
+    def __init__(self, codebook: Codebook):
+        self.codebook = codebook
+
+    def run(self, spots: SpotFindingResults, *args) -> DecodedIntensityTable:
+        """Decode spots by selecting the max-valued channel in each sequencing round
+
+        Parameters
+        ----------
+        spots: SpotFindingResults
+            A Dict of tile indices and their corresponding measured spots
+
+        Returns
+        -------
+        DecodedIntensityTable :
+            IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values.
+
+        """
+        intensities = build_spot_traces_exact_match(spots)
+        transfer_physical_coords_to_intensity_table(intensity_table=intensities, spots=spots)
+        return self.codebook.decode_per_round_max(intensities)
diff --git a/starfish/core/spots/DecodeSpots/trace_builders.py b/starfish/core/spots/DecodeSpots/trace_builders.py
@@ -0,0 +1,27 @@
+from starfish.core.intensity_table.intensity_table import IntensityTable
+from starfish.core.types import Axes, Features, SpotFindingResults
+
+
+def build_spot_traces_exact_match(spot_results: SpotFindingResults):
+    """
+    Combines spots found in matching x/y positions across rounds and channels of
+    an ImageStack into traces represented as an IntensityTable.
+
+    Parameters
+    -----------
+    spot_results: SpotFindingResults
+        Spots found accrss rounds/channels of an ImageStack
+    """
+    # create IntensityTable with same x/y/z info accross all r/ch
+    spot_attributes = spot_results[{Axes.ROUND: 0, Axes.CH: 0}]
+    ch_labels = spot_results.ch_labels
+    round_labels = spot_results.round_labels
+    intensity_table = IntensityTable.zeros(
+        spot_attributes=spot_attributes,
+        ch_labels=ch_labels,
+        round_labels=round_labels,
+    )
+    for r, c in spot_results.keys():
+        intensity_table.loc[dict(c=c, r=r)] = \
+            spot_results[{Axes.ROUND: r, Axes.CH: c}].data[Features.INTENSITY]
+    return intensity_table
diff --git a/starfish/core/spots/DetectSpots/blob.py b/starfish/core/spots/DetectSpots/blob.py
@@ -147,7 +147,10 @@ def run(
         n_processes : Optional[int] = None,
             Number of processes to devote to spot finding.
         """
-
+        DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor"
+                           "(See https://github.com/spacetx/starfish/issues/1514) This version of "
+                           "BlobDetection will soon be deleted. To find and decode your spots "
+                           "please instead use FindSpots.BlobDetector then DecodeSpots.")
         intensity_table = detect_spots(
             data_stack=primary_image,
             spot_finding_method=self.image_to_spots,

diff --git a/starfish/core/spots/DetectSpots/test/test_synthetic_data.py b/starfish/core/spots/DetectSpots/test/test_synthetic_data.py
@@ -88,5 +88,4 @@ def test_medium_synthetic_stack():
     calculated_intensities = codebook.decode_metric(
         calculated_intensities, max_distance=1, min_intensity=0, norm_order=2
     )
-
     assert len(calculated_intensities.coords[Features.TARGET]) == 80
diff --git a/starfish/core/spots/FindSpots/__init__.py b/starfish/core/spots/FindSpots/__init__.py
@@ -1,5 +1,5 @@
 from ._base import FindSpotsAlgorithm
-
+from .blob import BlobDetector
 
 # autodoc's automodule directive only captures the modules explicitly listed in __all__.
 all_filters = {

diff --git a/starfish/core/spots/FindSpots/_base.py b/starfish/core/spots/FindSpots/_base.py
@@ -1,7 +1,8 @@
 from abc import abstractmethod
-from typing import Callable, Optional, Sequence
+from typing import Callable, Optional, Union
 
 import numpy as np
+import xarray as xr
 
 from starfish.core.imagestack.imagestack import ImageStack
 from starfish.core.pipeline.algorithmbase import AlgorithmBase
@@ -46,7 +47,9 @@ def run(self, image_stack: ImageStack,
         raise NotImplementedError()
 
     @staticmethod
-    def _get_measurement_function(measurement_type: str) -> Callable[[Sequence], Number]:
+    def _get_measurement_function(
+            measurement_type: str
+    ) -> Callable[[Union[np.ndarray, xr.DataArray]], Number]:
         try:
             measurement_function = getattr(np, measurement_type)
         except AttributeError: