Skip to content

Commit

Permalink
ISS refactored with new spot finding path
Browse files Browse the repository at this point in the history
  • Loading branch information
Shannon Axelrod committed Oct 8, 2019
1 parent 505f73c commit cf170a5
Show file tree
Hide file tree
Showing 17 changed files with 415 additions and 103 deletions.
17 changes: 9 additions & 8 deletions docs/source/_static/data_processing_examples/iss_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import starfish
from starfish.image import ApplyTransform, Filter, LearnTransform, Segment
from starfish.spots import DetectSpots, AssignTargets
from starfish.spots import FindSpots, DecodeSpots, AssignTargets
from starfish.types import Axes

test = os.getenv("TESTING") is not None
Expand All @@ -30,22 +30,23 @@ def iss_pipeline(fov, codebook):
filt = Filter.WhiteTophat(masking_radius, is_volume=False)
filtered = filt.run(registered, verbose=True, in_place=False)

# detect spots using laplacian of gaussians approach
p = DetectSpots.BlobDetector(
bd = FindSpots.BlobDetector(
min_sigma=1,
max_sigma=10,
num_sigma=30,
threshold=0.01,
measurement_type='mean',
)

intensities = p.run(
filtered,
blobs_image=fov.get_image('dots'),
blobs_axes=(Axes.ROUND, Axes.ZPLANE))
# detect spots using laplacian of gaussians approach
dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
dots_max = dots_max_projector.run(fov.get_image('dots'))
# locate spots in a reference image
spots = bd.run(reference_image=dots_max, image_stack=filtered)

# decode the pixel traces using the codebook
decoded = codebook.decode_per_round_max(intensities)
decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook)
decoded = decoder.run(spots=spots)

# segment cells
seg = Segment.Watershed(
Expand Down
66 changes: 16 additions & 50 deletions notebooks/ISS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@
"source": [
"## Visualize Codebook\n",
"\n",
"The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. "
" The ISS codebook maps each barcode to a gene. This protocol asserts that genes are encoded with\n",
" a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword.\n",
" The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword.\n",
" The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'."
]
},
{
Expand Down Expand Up @@ -199,12 +202,12 @@
"f, (ax1, ax2) = plt.subplots(ncols=2)\n",
"vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])\n",
"imshow_plane(\n",
" single_plane, ax=ax1, vmin=vmin, vmax=vmax, \n",
" single_plane, ax=ax1, vmin=vmin, vmax=vmax,\n",
" title=\"Original data\\nRound: 0, Channel: 0\"\n",
")\n",
"vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])\n",
"imshow_plane(\n",
" single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax, \n",
" single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,\n",
" title=\"Filtered data\\nRound: 0, Channel: 0\"\n",
")"
]
Expand All @@ -220,7 +223,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain. "
"Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain."
]
},
{
Expand Down Expand Up @@ -252,61 +255,24 @@
"metadata": {},
"outputs": [],
"source": [
"from starfish.spots import DetectSpots\n",
"import warnings\n",
"from starfish.spots import FindSpots, DecodeSpots\n",
"\n",
"# parameters to define the allowable gaussian sizes (parameter space)\n",
"p = DetectSpots.BlobDetector(\n",
"bd = FindSpots.BlobDetector(\n",
" min_sigma=1,\n",
" max_sigma=10,\n",
" num_sigma=30,\n",
" threshold=0.01,\n",
" measurement_type='mean',\n",
")\n",
"\n",
"# blobs = dots; define the spots in the dots image, but then find them again in the stack.\n",
"intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The resulting \"Intensity Table\" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"intensities"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"decoded = experiment.codebook.decode_per_round_max(intensities)\n",
"decoded"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func=\"max\", module=Filter.Reduce.FunctionSource.np)\n",
"dots_max = dots_max_projector.run(dots)\n",
"spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)\n",
"\n",
"decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)\n",
"decoded = decoder.run(spots=spots)\n",
"\n",
"# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.\n",
"genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)\n",
"table = pd.Series(counts, index=genes).sort_values(ascending=False)\n",
Expand Down
41 changes: 14 additions & 27 deletions notebooks/py/ISS.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@
# EPY: START markdown
### Visualize Codebook
#
#The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'.
# The ISS codebook maps each barcode to a gene. This protocol asserts that genes are encoded with
# a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword.
# The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword.
# The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'.
# EPY: END markdown

# EPY: START code
Expand Down Expand Up @@ -131,12 +134,12 @@
f, (ax1, ax2) = plt.subplots(ncols=2)
vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])
imshow_plane(
single_plane, ax=ax1, vmin=vmin, vmax=vmax,
single_plane, ax=ax1, vmin=vmin, vmax=vmax,
title="Original data\nRound: 0, Channel: 0"
)
vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])
imshow_plane(
single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,
single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,
title="Filtered data\nRound: 0, Channel: 0"
)
# EPY: END code
Expand All @@ -146,7 +149,7 @@
# EPY: END markdown

# EPY: START markdown
#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain.
#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain.
# EPY: END markdown

# EPY: START code
Expand All @@ -165,40 +168,24 @@
# EPY: END markdown

# EPY: START code
from starfish.spots import DetectSpots
import warnings
from starfish.spots import FindSpots, DecodeSpots

# parameters to define the allowable gaussian sizes (parameter space)
p = DetectSpots.BlobDetector(
bd = FindSpots.BlobDetector(
min_sigma=1,
max_sigma=10,
num_sigma=30,
threshold=0.01,
measurement_type='mean',
)

# blobs = dots; define the spots in the dots image, but then find them again in the stack.
intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))
# EPY: END code
dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
dots_max = dots_max_projector.run(dots)
spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)

# EPY: START markdown
#The resulting "Intensity Table" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images
# EPY: END markdown
decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)
decoded = decoder.run(spots=spots)

# EPY: START code
intensities
# EPY: END code

# EPY: START markdown
#To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table.
# EPY: END markdown

# EPY: START code
decoded = experiment.codebook.decode_per_round_max(intensities)
decoded
# EPY: END code

# EPY: START code
# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.
genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)
table = pd.Series(counts, index=genes).sort_values(ascending=False)
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/expression_matrix/test/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_save_expression_matrix():

codebook = codebook_array_factory()

decoded_intensities = factories.synthetic_decoded_intenisty_table(
decoded_intensities = factories.synthetic_decoded_intensity_table(
codebook,
num_z=3,
height=100,
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/intensity_table/test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def synthetic_intensity_table() -> IntensityTable:
return IntensityTable.synthetic_intensities(loaded_codebook(), n_spots=2)


def synthetic_decoded_intenisty_table(
def synthetic_decoded_intensity_table(
codebook,
num_z: int = 12,
height: int = 50,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from starfish.core.codebook.test.factories import codebook_array_factory
from starfish.core.imagestack.test.factories import imagestack_with_coords_factory
from starfish.core.types import Axes, Coordinates, Features, PhysicalCoordinateTypes
from .factories import synthetic_decoded_intenisty_table
from .factories import synthetic_decoded_intensity_table
from ..intensity_table import IntensityTable
from ..intensity_table_coordinates import (
transfer_physical_coords_to_intensity_table,
Expand Down Expand Up @@ -93,7 +93,7 @@ def test_tranfering_physical_coords_to_expression_matrix():
stack = imagestack_with_coords_factory(stack_shape, physical_coords)
codebook = codebook_array_factory()

decoded_intensities = synthetic_decoded_intenisty_table(
decoded_intensities = synthetic_decoded_intensity_table(
codebook,
num_z=stack_shape[Axes.ZPLANE],
height=stack_shape[Axes.Y],
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/spots/DecodeSpots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._base import DecodeSpotsAlgorithm

from .per_round_max_channel_decoder import PerRoundMaxChannel

# autodoc's automodule directive only captures the modules explicitly listed in __all__.
all_filters = {
Expand Down
45 changes: 45 additions & 0 deletions starfish/core/spots/DecodeSpots/per_round_max_channel_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from starfish.core.codebook.codebook import Codebook
from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable
from starfish.core.intensity_table.intensity_table_coordinates import \
transfer_physical_coords_to_intensity_table
from starfish.core.spots.DecodeSpots.trace_builders import build_spot_traces_exact_match
from starfish.core.types import SpotFindingResults
from ._base import DecodeSpotsAlgorithm


class PerRoundMaxChannel(DecodeSpotsAlgorithm):
"""
Decode spots by selecting the max-valued channel in each sequencing round.
Note that this assumes that the codebook contains only one "on" channel per sequencing round,
a common pattern in experiments that assign one fluorophore to each DNA nucleotide and
read DNA sequentially. It is also a characteristic of single-molecule FISH and RNAscope
codebooks.
Parameters
----------
codebook : Codebook
Contains codes to decode IntensityTable
"""

def __init__(self, codebook: Codebook):
self.codebook = codebook

def run(self, spots: SpotFindingResults, *args) -> DecodedIntensityTable:
"""Decode spots by selecting the max-valued channel in each sequencing round
Parameters
----------
spots: SpotFindingResults
A Dict of tile indices and their corresponding measured spots
Returns
-------
DecodedIntensityTable :
IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values.
"""
intensities = build_spot_traces_exact_match(spots)
transfer_physical_coords_to_intensity_table(intensity_table=intensities, spots=spots)
return self.codebook.decode_per_round_max(intensities)
27 changes: 27 additions & 0 deletions starfish/core/spots/DecodeSpots/trace_builders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from starfish.core.intensity_table.intensity_table import IntensityTable
from starfish.core.types import Axes, Features, SpotFindingResults


def build_spot_traces_exact_match(spot_results: SpotFindingResults) -> IntensityTable:
"""
Combines spots found in matching x/y positions across rounds and channels of
an ImageStack into traces represented as an IntensityTable.
Parameters
-----------
spot_results: SpotFindingResults
Spots found accrss rounds/channels of an ImageStack
"""
# create IntensityTable with same x/y/z info accross all r/ch
spot_attributes = list(spot_results.values())[0]
intensity_table = IntensityTable.zeros(
spot_attributes=spot_attributes,
round_labels=spot_results.round_labels,
ch_labels=spot_results.ch_labels,
)
for r, c in spot_results.keys():
value = spot_results[{Axes.ROUND: r, Axes.CH: c}].data[Features.INTENSITY]
# if no exact match set value to 0
value = 0 if value.empty else value
intensity_table.loc[dict(c=c, r=r)] = value
return intensity_table
5 changes: 4 additions & 1 deletion starfish/core/spots/DetectSpots/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ def run(
n_processes : Optional[int] = None,
Number of processes to devote to spot finding.
"""

DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor"
"(See https://github.com/spacetx/starfish/issues/1514) This version of "
"BlobDetection will soon be deleted. To find and decode your spots "
"please instead use FindSpots.BlobDetector followed by DecodeSpots.")
intensity_table = detect_spots(
data_stack=primary_image,
spot_finding_method=self.image_to_spots,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,4 @@ def test_medium_synthetic_stack():
calculated_intensities = codebook.decode_metric(
calculated_intensities, max_distance=1, min_intensity=0, norm_order=2
)

assert len(calculated_intensities.coords[Features.TARGET]) == 80
2 changes: 1 addition & 1 deletion starfish/core/spots/FindSpots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._base import FindSpotsAlgorithm

from .blob import BlobDetector

# autodoc's automodule directive only captures the modules explicitly listed in __all__.
all_filters = {
Expand Down
7 changes: 5 additions & 2 deletions starfish/core/spots/FindSpots/_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import abstractmethod
from typing import Callable, Optional, Sequence
from typing import Callable, Optional, Union

import numpy as np
import xarray as xr

from starfish.core.imagestack.imagestack import ImageStack
from starfish.core.pipeline.algorithmbase import AlgorithmBase
Expand Down Expand Up @@ -46,7 +47,9 @@ def run(self, image_stack: ImageStack,
raise NotImplementedError()

@staticmethod
def _get_measurement_function(measurement_type: str) -> Callable[[Sequence], Number]:
def _get_measurement_function(
measurement_type: str
) -> Callable[[Union[np.ndarray, xr.DataArray]], Number]:
try:
measurement_function = getattr(np, measurement_type)
except AttributeError:
Expand Down
Loading

0 comments on commit cf170a5

Please sign in to comment.