Skip to content

Commit

Permalink
adding DecodedIntensityTable (#1489)
Browse files Browse the repository at this point in the history
  • Loading branch information
Shannon Axelrod authored Sep 19, 2019
1 parent c16c8de commit e325fb9
Show file tree
Hide file tree
Showing 19 changed files with 333 additions and 175 deletions.
7 changes: 7 additions & 0 deletions docs/source/api/data_structures/decoded_intensity_table.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. _DecodedIntensityTable:

DecodedIntensityTable
=====================

.. autoclass:: starfish.core.intensity_table.decoded_intensity_table.DecodedIntensityTable
:members:
3 changes: 3 additions & 0 deletions docs/source/api/data_structures/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,6 @@ serialization for use in single-cell analysis environments such as Seurat_ and S

.. toctree::
intensity_table.rst

.. toctree::
decoded_intensity_table.rst
3 changes: 3 additions & 0 deletions docs/source/help_and_reference/glossary/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ A feature that is the target of quantification by an image-based assays. Common
### IntensityTable
An intensity Table contains the features identified in an ImageStack. It can be thought of as an array whose entries are the intensities of each feature across the imaging rounds and channels of a field of view. Starfish exposes several processing tools to decode the features of the table, estimate their qualities, and assign features to cells.

### DecodedIntensityTable
A representation of a decoded intensity table. Contains the features identified in an ImageStack as well as their associated target values.

### Codeword
A codeword maps expected intensities across multiple image tiles within a field of view to the target that is encoded by the codeword.

Expand Down
1 change: 1 addition & 0 deletions starfish/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .core.experiment.experiment import Experiment, FieldOfView
from .core.expression_matrix.expression_matrix import ExpressionMatrix
from .core.imagestack.imagestack import ImageStack
from .core.intensity_table.decoded_intensity_table import DecodedIntensityTable
from .core.intensity_table.intensity_table import IntensityTable
from .core.segmentation_mask import SegmentationMaskCollection
from .core.starfish import starfish
44 changes: 23 additions & 21 deletions starfish/core/codebook/codebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
MIN_SUPPORTED_VERSION,
)
from starfish.core.config import StarfishConfig
from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable
from starfish.core.intensity_table.intensity_table import IntensityTable
from starfish.core.spacetx_format.util import CodebookValidator
from starfish.core.types import Axes, Features, Number
Expand Down Expand Up @@ -506,7 +507,7 @@ def _validate_decode_intensity_input_matches_codebook_shape(
def decode_metric(
self, intensities: IntensityTable, max_distance: Number, min_intensity: Number,
norm_order: int, metric: str='euclidean'
) -> IntensityTable:
) -> DecodedIntensityTable:
"""
Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and
stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that
Expand Down Expand Up @@ -552,10 +553,11 @@ def decode_metric(

# add empty metadata fields and return
if intensities.sizes[Features.AXIS] == 0:
intensities[Features.TARGET] = (Features.AXIS, np.empty(0, dtype='U'))
intensities[Features.DISTANCE] = (Features.AXIS, np.empty(0, dtype=float))
intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, np.empty(0, dtype=bool))
return intensities
return DecodedIntensityTable.from_intensity_table(
intensities,
targets=(Features.AXIS, np.empty(0, dtype='U')),
distances=(Features.AXIS, np.empty(0, dtype=np.float64)),
passes_threshold=(Features.AXIS, np.empty(0, dtype=bool)))

# normalize both the intensities and the codebook
norm_intensities, norms = self._normalize_features(intensities, norm_order=norm_order)
Expand All @@ -571,15 +573,14 @@ def decode_metric(
dtype=np.bool
)

# set targets, distances, and filtering results
norm_intensities[Features.TARGET] = (Features.AXIS, targets)
norm_intensities[Features.DISTANCE] = (Features.AXIS, metric_outputs)
norm_intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, passes_filters)

# norm_intensities is a DataArray, make it back into an IntensityTable
return IntensityTable(norm_intensities)
return DecodedIntensityTable.from_intensity_table(
norm_intensities,
targets=(Features.AXIS, targets),
distances=(Features.AXIS, metric_outputs),
passes_threshold=(Features.AXIS, passes_filters))

def decode_per_round_max(self, intensities: IntensityTable) -> IntensityTable:
def decode_per_round_max(self, intensities: IntensityTable) -> DecodedIntensityTable:
"""
Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and
stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that
Expand Down Expand Up @@ -641,10 +642,11 @@ def _view_row_as_element(array: np.ndarray) -> np.ndarray:

# add empty metadata fields and return
if intensities.sizes[Features.AXIS] == 0:
intensities[Features.TARGET] = (Features.AXIS, np.empty(0, dtype='U'))
intensities[Features.DISTANCE] = (Features.AXIS, np.empty(0, dtype=float))
intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, np.empty(0, dtype=bool))
return intensities
return DecodedIntensityTable.from_intensity_table(
intensities,
targets=(Features.AXIS, np.empty(0, dtype='U')),
distances=(Features.AXIS, np.empty(0, dtype=np.float64)),
passes_threshold=(Features.AXIS, np.empty(0, dtype=bool)))

max_channels = intensities.argmax(Axes.CH.value)
codes = self.argmax(Axes.CH.value)
Expand All @@ -668,11 +670,11 @@ def _view_row_as_element(array: np.ndarray) -> np.ndarray:
# a code passes filters if it decodes successfully
passes_filters = ~pd.isnull(targets)

intensities[Features.TARGET] = (Features.AXIS, targets.astype('U'))
intensities[Features.DISTANCE] = (Features.AXIS, distance)
intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, passes_filters)

return intensities
return DecodedIntensityTable.from_intensity_table(
intensities,
targets=(Features.AXIS, targets.astype('U')),
distances=(Features.AXIS, distance),
passes_threshold=(Features.AXIS, passes_filters))

@classmethod
def synthetic_one_hot_codebook(
Expand Down
9 changes: 5 additions & 4 deletions starfish/core/expression_matrix/test/test_serialization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import random

from starfish import IntensityTable
from starfish.core.codebook.test.factories import codebook_array_factory
from starfish.core.intensity_table.test import factories
from starfish.core.types import Features

NUMBER_SPOTS = 10
Expand All @@ -11,7 +11,7 @@ def test_save_expression_matrix():

codebook = codebook_array_factory()

intensities = IntensityTable.synthetic_intensities(
decoded_intensities = factories.synthetic_decoded_intenisty_table(
codebook,
num_z=3,
height=100,
Expand All @@ -20,9 +20,10 @@ def test_save_expression_matrix():
)
# mock out come cell_ids
cell_ids = random.sample(range(1, 20), NUMBER_SPOTS)
intensities[Features.CELL_ID] = (Features.AXIS, cell_ids)

expression_matrix = intensities.to_expression_matrix()
decoded_intensities[Features.CELL_ID] = (Features.AXIS, cell_ids)

expression_matrix = decoded_intensities.to_expression_matrix()

# test all saving methods
expression_matrix.save("expression")
176 changes: 176 additions & 0 deletions starfish/core/intensity_table/decoded_intensity_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from typing import Optional, Tuple

import numpy as np
import pandas as pd

from starfish.core.expression_matrix.expression_matrix import ExpressionMatrix
from starfish.core.intensity_table.intensity_table import IntensityTable
from starfish.core.types import (
Axes,
Coordinates,
DecodedSpots,
Features,
)


class DecodedIntensityTable(IntensityTable):
"""
DecodedIntensityTable is a container for spot or pixel features extracted from image data
that have been decoded. It is the primary output from starfish :py:class:`Decode` methods.
An IntensityTable records the numeric intensity of a set of features in each
:code:`(round, channel)` tile in which the feature is identified.
The :py:class:`IntensityTable` has shape
:code:`(n_feature, n_channel, n_round)`.
Some :py:class:`SpotFinder` methods identify a position and search for Gaussian blobs in a
small radius, only recording intensities if they are found in a given tile. Other
:py:class:SpotFinder: approaches find blobs in a max-projection and measure them everywhere.
As a result, some IntensityTables will be dense, and others will contain :code:`np.nan`
entries where no feature was detected.
Examples
--------
Create an IntensityTable using the ``synthetic_intensities`` method::
>>> from starfish.core.test.factories import SyntheticData
>>> sd = SyntheticData(n_ch=3, n_round=4, n_codes=2)
>>> codes = sd.codebook()
>>> sd.intensities(codebook=codes)
<xarray.IntensityTable (features: 2, c: 3, h: 4)>
array([[[ 0., 0., 0., 0.],
[ 0., 0., 8022., 12412.],
[11160., 9546., 0., 0.]],
[[ 0., 0., 0., 0.],
[ 0., 0., 10506., 10830.],
[11172., 12331., 0., 0.]]])
Coordinates:
* features (features) MultiIndex
- z (features) int64 7 3
- y (features) int64 14 32
- x (features) int64 32 15
- r (features) float64 nan nan
* c (c) int64 0 1 2
* h (h) int64 0 1 2 3
target (features) object 08b1a822-a1b4-4e06-81ea-8a4bd2b004a9 ...
"""

@classmethod
def from_intensity_table(
cls,
intensities: IntensityTable,
targets: Tuple[str, np.ndarray],
distances: Optional[Tuple[str, np.ndarray]] = None,
passes_threshold: Optional[Tuple[str, np.ndarray]] = None):
"""
Assign target values to intensities.
Parameters
----------
intensities : IntensityTable
intensity_table to assign target values to
targets : Tuple[str, np.ndarray]
Target values to assign
distances : Optional[Tuple[str, np.ndarray]]
Corresponding array of distances from target for each feature
passes_threshold : Optional[Tuple[str, np.ndarray]]
Corresponding array of boolean values indicating if each itensity passed
given thresholds.
Returns
-------
DecodedIntensityTable
"""

intensities = cls(intensities)
intensities[Features.TARGET] = targets
if distances:
intensities[Features.DISTANCE] = distances
if passes_threshold:
intensities[Features.PASSES_THRESHOLDS] = passes_threshold
return intensities

def to_decoded_dataframe(self) -> DecodedSpots:
"""
Generates a dataframe containing decoded spot information. Guaranteed to contain physical
spot coordinates (z, y, x) and gene target. Does not contain pixel coordinates.
"""
df = self.to_features_dataframe()
pixel_coordinates = pd.Index([Axes.X, Axes.Y, Axes.ZPLANE])
df = df.drop(pixel_coordinates.intersection(df.columns), axis=1).drop(Features.AXIS, axis=1)
return DecodedSpots(df)

def to_mermaid(self, filename: str) -> pd.DataFrame:
"""
Writes a .csv.gz file in columnar format that is readable by MERMAID visualization
software.
To run MERMAID, follow the installation instructions for that repository and simply
replace the data.csv.gz file with the output of this function.
Parameters
----------
filename : str
Name for compressed-gzipped MERMAID data file. Should end in '.csv.gz'.
Notes
------
See also https://github.com/JEFworks/MERmaid
"""
# construct the MERMAID dataframe. As MERMAID adds support for non-categorical variables,
# additional columns can be added here
df = self.to_features_dataframe()
column_order = [
Axes.X,
Axes.Y,
Features.TARGET,
Features.TARGET, # added twice to support simultaneous coding
]
mermaid_data = df[column_order]

# write to disk
mermaid_data.to_csv(filename, compression='gzip', index=False)

def to_expression_matrix(self) -> ExpressionMatrix:
"""
Generates a cell x gene count matrix where each cell is annotated with spatial metadata.
Requires that spots in the IntensityTable have been assigned to cells.
Returns
-------
ExpressionMatrix :
cell x gene expression table
"""
if Features.CELL_ID not in self.coords:
raise KeyError(
"IntensityTable must have 'cell_id' assignments for each cell before this function "
"can be called. See starfish.spots.AssignTargets.Label.")
grouped = self.to_features_dataframe().groupby([Features.CELL_ID, Features.TARGET])
counts = grouped.count().iloc[:, 0].unstack().fillna(0)
if self.has_physical_coords:
grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[
Axes.X.value, Axes.Y.value, Axes.ZPLANE.value, Coordinates.X.value,
Coordinates.Y.value, Coordinates.Z.value]]
else:
grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[
Axes.X.value, Axes.Y.value, Axes.ZPLANE.value]]
min_ = grouped.min()
max_ = grouped.max()
coordinate_df = min_ + (max_ - min_) / 2
metadata = {name: (Features.CELLS, data.values) for name, data in coordinate_df.items()}
metadata[Features.AREA] = (Features.CELLS, np.full(counts.shape[0], fill_value=np.nan))
# add genes to the metadata
metadata.update({Features.GENES: counts.columns.values})
metadata.update({Features.CELL_ID: (Features.CELLS, counts.index.values)})

mat = ExpressionMatrix(
data=counts.values,
dims=(Features.CELLS, Features.GENES),
coords=metadata,
name='expression_matrix'
)
return mat
Loading

0 comments on commit e325fb9

Please sign in to comment.