Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2) ISS spot finding refactor #1518

Merged
merged 1 commit into from
Oct 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions docs/source/_static/data_processing_examples/iss_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import starfish
from starfish.image import ApplyTransform, Filter, LearnTransform, Segment
from starfish.spots import DetectSpots, AssignTargets
from starfish.spots import FindSpots, DecodeSpots, AssignTargets
from starfish.types import Axes

test = os.getenv("TESTING") is not None
Expand All @@ -30,22 +30,23 @@ def iss_pipeline(fov, codebook):
filt = Filter.WhiteTophat(masking_radius, is_volume=False)
filtered = filt.run(registered, verbose=True, in_place=False)

# detect spots using laplacian of gaussians approach
p = DetectSpots.BlobDetector(
bd = FindSpots.BlobDetector(
min_sigma=1,
max_sigma=10,
num_sigma=30,
threshold=0.01,
measurement_type='mean',
)

intensities = p.run(
filtered,
blobs_image=fov.get_image('dots'),
blobs_axes=(Axes.ROUND, Axes.ZPLANE))
# detect spots using laplacian of gaussians approach
dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
dots_max = dots_max_projector.run(fov.get_image('dots'))
# locate spots in a reference image
spots = bd.run(reference_image=dots_max, image_stack=filtered)

# decode the pixel traces using the codebook
decoded = codebook.decode_per_round_max(intensities)
decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook)
decoded = decoder.run(spots=spots)

# segment cells
seg = Segment.Watershed(
Expand Down
66 changes: 16 additions & 50 deletions notebooks/ISS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@
"source": [
"## Visualize Codebook\n",
"\n",
"The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. "
" The ISS codebook maps each barcode to a gene. This protocol asserts that genes are encoded with\n",
" a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword.\n",
" The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword.\n",
" The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'."
]
},
{
Expand Down Expand Up @@ -199,12 +202,12 @@
"f, (ax1, ax2) = plt.subplots(ncols=2)\n",
"vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])\n",
"imshow_plane(\n",
" single_plane, ax=ax1, vmin=vmin, vmax=vmax, \n",
" single_plane, ax=ax1, vmin=vmin, vmax=vmax,\n",
" title=\"Original data\\nRound: 0, Channel: 0\"\n",
")\n",
"vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])\n",
"imshow_plane(\n",
" single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax, \n",
" single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,\n",
" title=\"Filtered data\\nRound: 0, Channel: 0\"\n",
")"
]
Expand All @@ -220,7 +223,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain. "
"Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain."
]
},
{
Expand Down Expand Up @@ -252,61 +255,24 @@
"metadata": {},
"outputs": [],
"source": [
"from starfish.spots import DetectSpots\n",
"import warnings\n",
"from starfish.spots import FindSpots, DecodeSpots\n",
"\n",
"# parameters to define the allowable gaussian sizes (parameter space)\n",
"p = DetectSpots.BlobDetector(\n",
"bd = FindSpots.BlobDetector(\n",
" min_sigma=1,\n",
" max_sigma=10,\n",
" num_sigma=30,\n",
" threshold=0.01,\n",
" measurement_type='mean',\n",
")\n",
"\n",
"# blobs = dots; define the spots in the dots image, but then find them again in the stack.\n",
"intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The resulting \"Intensity Table\" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"intensities"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"decoded = experiment.codebook.decode_per_round_max(intensities)\n",
"decoded"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func=\"max\", module=Filter.Reduce.FunctionSource.np)\n",
"dots_max = dots_max_projector.run(dots)\n",
"spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)\n",
"\n",
"decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)\n",
"decoded = decoder.run(spots=spots)\n",
"\n",
"# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.\n",
"genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)\n",
"table = pd.Series(counts, index=genes).sort_values(ascending=False)\n",
Expand Down
41 changes: 14 additions & 27 deletions notebooks/py/ISS.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@
# EPY: START markdown
### Visualize Codebook
#
#The ISS codebook maps each barcode to a gene.This protocol asserts that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'.
# The ISS codebook maps each barcode to a gene. This protocol asserts that genes are encoded with
# a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword.
# The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword.
# The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'.
# EPY: END markdown

# EPY: START code
Expand Down Expand Up @@ -131,12 +134,12 @@
f, (ax1, ax2) = plt.subplots(ncols=2)
vmin, vmax = np.percentile(single_plane.xarray.values.data, [5, 99])
imshow_plane(
single_plane, ax=ax1, vmin=vmin, vmax=vmax,
single_plane, ax=ax1, vmin=vmin, vmax=vmax,
title="Original data\nRound: 0, Channel: 0"
)
vmin, vmax = np.percentile(single_plane_filtered.xarray.values.data, [5, 99])
imshow_plane(
single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,
single_plane_filtered, ax=ax2, vmin=vmin, vmax=vmax,
title="Filtered data\nRound: 0, Channel: 0"
)
# EPY: END code
Expand All @@ -146,7 +149,7 @@
# EPY: END markdown

# EPY: START markdown
#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain.
#Images may have shifted between imaging rounds. This needs to be corrected for before decoding, since this shift in the images will corrupt the barcodes, thus hindering decoding accuracy. A simple procedure can correct for this shift. For each imaging round, the max projection across color channels should look like the dots stain. Below, we simply shift all images in each round to match the dots stain by learning the shift that maximizes the cross-correlation between the images and the dots stain.
# EPY: END markdown

# EPY: START code
Expand All @@ -165,40 +168,24 @@
# EPY: END markdown

# EPY: START code
from starfish.spots import DetectSpots
import warnings
from starfish.spots import FindSpots, DecodeSpots

# parameters to define the allowable gaussian sizes (parameter space)
p = DetectSpots.BlobDetector(
bd = FindSpots.BlobDetector(
min_sigma=1,
max_sigma=10,
num_sigma=30,
threshold=0.01,
measurement_type='mean',
)

# blobs = dots; define the spots in the dots image, but then find them again in the stack.
intensities = p.run(registered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE))
# EPY: END code
dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np)
dots_max = dots_max_projector.run(dots)
spots = bd.run(image_stack=registered_imgs, reference_image=dots_max)

# EPY: START markdown
#The resulting "Intensity Table" table is the standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images
# EPY: END markdown
decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook)
decoded = decoder.run(spots=spots)

# EPY: START code
intensities
# EPY: END code

# EPY: START markdown
#To decode the resulting intensity table, we simply match intensities to codewards in the codebook. This can be done by, for each round, selecting the color channel with the maximum intensity. This forms a potential quatenary code which serves as the key into a lookup in the codebook as to which gene this code corresponds to. Decoded genes are assigned to the ```target``` field in the decoded intensity table.
# EPY: END markdown

# EPY: START code
decoded = experiment.codebook.decode_per_round_max(intensities)
decoded
# EPY: END code

# EPY: START code
# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here.
genes, counts = np.unique(decoded.loc[decoded[Features.PASSES_THRESHOLDS]][Features.TARGET], return_counts=True)
table = pd.Series(counts, index=genes).sort_values(ascending=False)
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/expression_matrix/test/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_save_expression_matrix():

codebook = codebook_array_factory()

decoded_intensities = factories.synthetic_decoded_intenisty_table(
decoded_intensities = factories.synthetic_decoded_intensity_table(
codebook,
num_z=3,
height=100,
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/intensity_table/test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def synthetic_intensity_table() -> IntensityTable:
return IntensityTable.synthetic_intensities(loaded_codebook(), n_spots=2)


def synthetic_decoded_intenisty_table(
def synthetic_decoded_intensity_table(
codebook,
num_z: int = 12,
height: int = 50,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from starfish.core.codebook.test.factories import codebook_array_factory
from starfish.core.imagestack.test.factories import imagestack_with_coords_factory
from starfish.core.types import Axes, Coordinates, Features, PhysicalCoordinateTypes
from .factories import synthetic_decoded_intenisty_table
from .factories import synthetic_decoded_intensity_table
from ..intensity_table import IntensityTable
from ..intensity_table_coordinates import (
transfer_physical_coords_to_intensity_table,
Expand Down Expand Up @@ -93,7 +93,7 @@ def test_tranfering_physical_coords_to_expression_matrix():
stack = imagestack_with_coords_factory(stack_shape, physical_coords)
codebook = codebook_array_factory()

decoded_intensities = synthetic_decoded_intenisty_table(
decoded_intensities = synthetic_decoded_intensity_table(
codebook,
num_z=stack_shape[Axes.ZPLANE],
height=stack_shape[Axes.Y],
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/spots/DecodeSpots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._base import DecodeSpotsAlgorithm

from .per_round_max_channel_decoder import PerRoundMaxChannel

# autodoc's automodule directive only captures the modules explicitly listed in __all__.
all_filters = {
Expand Down
45 changes: 45 additions & 0 deletions starfish/core/spots/DecodeSpots/per_round_max_channel_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from starfish.core.codebook.codebook import Codebook
from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable
from starfish.core.intensity_table.intensity_table_coordinates import \
transfer_physical_coords_to_intensity_table
from starfish.core.spots.DecodeSpots.trace_builders import build_spot_traces_exact_match
from starfish.core.types import SpotFindingResults
from ._base import DecodeSpotsAlgorithm


class PerRoundMaxChannel(DecodeSpotsAlgorithm):
"""
Decode spots by selecting the max-valued channel in each sequencing round.

Note that this assumes that the codebook contains only one "on" channel per sequencing round,
a common pattern in experiments that assign one fluorophore to each DNA nucleotide and
read DNA sequentially. It is also a characteristic of single-molecule FISH and RNAscope
codebooks.

Parameters
----------
codebook : Codebook
Contains codes to decode IntensityTable

"""

def __init__(self, codebook: Codebook):
self.codebook = codebook

def run(self, spots: SpotFindingResults, *args) -> DecodedIntensityTable:
"""Decode spots by selecting the max-valued channel in each sequencing round

Parameters
----------
spots: SpotFindingResults
A Dict of tile indices and their corresponding measured spots

Returns
-------
DecodedIntensityTable :
IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values.

"""
intensities = build_spot_traces_exact_match(spots)
transfer_physical_coords_to_intensity_table(intensity_table=intensities, spots=spots)
return self.codebook.decode_per_round_max(intensities)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of scope: I think Codebook.decode_per_round_max should be brought over to this module. thoughts?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea that's actually something I've been thinking during this refactor. Especially since any new decoding method shouldn't have to add code to the Codebook class, I can make that PR

27 changes: 27 additions & 0 deletions starfish/core/spots/DecodeSpots/trace_builders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from starfish.core.intensity_table.intensity_table import IntensityTable
from starfish.core.types import Axes, Features, SpotFindingResults


def build_spot_traces_exact_match(spot_results: SpotFindingResults) -> IntensityTable:
"""
Combines spots found in matching x/y positions across rounds and channels of
an ImageStack into traces represented as an IntensityTable.

Parameters
-----------
spot_results: SpotFindingResults
Spots found accrss rounds/channels of an ImageStack
"""
# create IntensityTable with same x/y/z info accross all r/ch
spot_attributes = list(spot_results.values())[0]
intensity_table = IntensityTable.zeros(
spot_attributes=spot_attributes,
round_labels=spot_results.round_labels,
ch_labels=spot_results.ch_labels,
)
for r, c in spot_results.keys():
value = spot_results[{Axes.ROUND: r, Axes.CH: c}].data[Features.INTENSITY]
# if no exact match set value to 0
value = 0 if value.empty else value
intensity_table.loc[dict(c=c, r=r)] = value
return intensity_table
5 changes: 4 additions & 1 deletion starfish/core/spots/DetectSpots/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ def run(
n_processes : Optional[int] = None,
Number of processes to devote to spot finding.
"""

DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor"
"(See https://github.com/spacetx/starfish/issues/1514) This version of "
"BlobDetection will soon be deleted. To find and decode your spots "
"please instead use FindSpots.BlobDetector followed by DecodeSpots.")
intensity_table = detect_spots(
data_stack=primary_image,
spot_finding_method=self.image_to_spots,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,4 @@ def test_medium_synthetic_stack():
calculated_intensities = codebook.decode_metric(
calculated_intensities, max_distance=1, min_intensity=0, norm_order=2
)

assert len(calculated_intensities.coords[Features.TARGET]) == 80
2 changes: 1 addition & 1 deletion starfish/core/spots/FindSpots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._base import FindSpotsAlgorithm

from .blob import BlobDetector

# autodoc's automodule directive only captures the modules explicitly listed in __all__.
all_filters = {
Expand Down
7 changes: 5 additions & 2 deletions starfish/core/spots/FindSpots/_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import abstractmethod
from typing import Callable, Optional, Sequence
from typing import Callable, Optional, Union

import numpy as np
import xarray as xr

from starfish.core.imagestack.imagestack import ImageStack
from starfish.core.pipeline.algorithmbase import AlgorithmBase
Expand Down Expand Up @@ -46,7 +47,9 @@ def run(self, image_stack: ImageStack,
raise NotImplementedError()

@staticmethod
def _get_measurement_function(measurement_type: str) -> Callable[[Sequence], Number]:
def _get_measurement_function(
measurement_type: str
) -> Callable[[Union[np.ndarray, xr.DataArray]], Number]:
try:
measurement_function = getattr(np, measurement_type)
except AttributeError:
Expand Down
Loading