diff --git a/notebooks/py/smFISH.py b/notebooks/py/smFISH.py index 8ffd314af..f53c88eee 100644 --- a/notebooks/py/smFISH.py +++ b/notebooks/py/smFISH.py @@ -30,7 +30,8 @@ import starfish import starfish.data -from starfish import FieldOfView, IntensityTable +from starfish import FieldOfView, DecodedIntensityTable +from starfish.types import TraceBuildingStrategies # equivalent to %gui qt ipython = get_ipython() @@ -72,7 +73,7 @@ # EPY: END markdown # EPY: START code -tlmpf = starfish.spots.DetectSpots.TrackpyLocalMaxPeakFinder( +tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder( spot_diameter=5, # must be odd integer min_mass=0.02, max_size=2, # this is max radius @@ -96,6 +97,7 @@ import sys print = partial(print, file=sys.stderr) + def processing_pipeline( experiment: starfish.Experiment, fov_name: str, @@ -123,6 +125,11 @@ def processing_pipeline( print("Loading images...") images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES)) + decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel( + codebook=codebook, + trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL + ) + for image_number, primary_image in images: print(f"Filtering image {image_number}...") filter_kwargs = dict( @@ -140,15 +147,14 @@ def processing_pipeline( clip2.run(primary_image, **filter_kwargs) print("Calling spots...") - spot_attributes = tlmpf.run(primary_image) - all_intensities.append(spot_attributes) + spots = tlmpf.run(primary_image) + print("Decoding spots...") + decoded_intensities = decoder.run(spots) + all_intensities.append(decoded_intensities) - spot_attributes = IntensityTable.concatenate_intensity_tables(all_intensities) - - print("Decoding spots...") - decoded = codebook.decode_per_round_max(spot_attributes) + decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities) decoded = decoded[decoded["total_intensity"] > .025] - + print("Processing complete.") return primary_image, decoded diff --git a/notebooks/smFISH.ipynb b/notebooks/smFISH.ipynb index 79f9c39e5..ee63bcd32 100644 --- a/notebooks/smFISH.ipynb +++ b/notebooks/smFISH.ipynb @@ -40,7 +40,8 @@ "\n", "import starfish\n", "import starfish.data\n", - "from starfish import FieldOfView, IntensityTable\n", + "from starfish import FieldOfView, DecodedIntensityTable\n", + "from starfish.types import TraceBuildingStrategies\n", "\n", "# equivalent to %gui qt\n", "ipython = get_ipython()\n", @@ -98,7 +99,7 @@ "metadata": {}, "outputs": [], "source": [ - "tlmpf = starfish.spots.DetectSpots.TrackpyLocalMaxPeakFinder(\n", + "tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder(\n", " spot_diameter=5, # must be odd integer\n", " min_mass=0.02,\n", " max_size=2, # this is max radius\n", @@ -130,6 +131,7 @@ "import sys\n", "print = partial(print, file=sys.stderr)\n", "\n", + "\n", "def processing_pipeline(\n", " experiment: starfish.Experiment,\n", " fov_name: str,\n", @@ -157,6 +159,11 @@ " print(\"Loading images...\")\n", " images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES))\n", "\n", + " decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel(\n", + " codebook=codebook,\n", + " trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL\n", + " )\n", + "\n", " for image_number, primary_image in images:\n", " print(f\"Filtering image {image_number}...\")\n", " filter_kwargs = dict(\n", @@ -174,15 +181,14 @@ " clip2.run(primary_image, **filter_kwargs)\n", "\n", " print(\"Calling spots...\")\n", - " spot_attributes = tlmpf.run(primary_image)\n", - " all_intensities.append(spot_attributes)\n", + " spots = tlmpf.run(primary_image)\n", + " print(\"Decoding spots...\")\n", + " decoded_intensities = decoder.run(spots)\n", + " all_intensities.append(decoded_intensities)\n", "\n", - " spot_attributes = IntensityTable.concatenate_intensity_tables(all_intensities)\n", - "\n", - " print(\"Decoding spots...\")\n", - " decoded = codebook.decode_per_round_max(spot_attributes)\n", + " decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities)\n", " decoded = decoded[decoded[\"total_intensity\"] > .025]\n", - " \n", + "\n", " print(\"Processing complete.\")\n", "\n", " return primary_image, decoded" diff --git a/starfish/core/spots/DecodeSpots/trace_builders.py b/starfish/core/spots/DecodeSpots/trace_builders.py index 8219054ce..b54ff373d 100644 --- a/starfish/core/spots/DecodeSpots/trace_builders.py +++ b/starfish/core/spots/DecodeSpots/trace_builders.py @@ -1,7 +1,10 @@ from typing import Callable, Mapping +import pandas as pd + from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.types import Axes, Features, SpotFindingResults, TraceBuildingStrategies +from starfish.core.types import Axes, Features, SpotAttributes, SpotFindingResults, \ + TraceBuildingStrategies from .util import _build_intensity_table, _match_spots, _merge_spots_by_round @@ -30,6 +33,40 @@ def build_spot_traces_exact_match(spot_results: SpotFindingResults, **kwargs) -> return intensity_table +def build_traces_sequential(spot_results: SpotFindingResults, **kwargs) -> IntensityTable: + """ + Build spot traces without merging across channels and imaging rounds. Used for sequential + methods like smFIsh. + + Parameters + ---------- + spot_results: SpotFindingResults + Spots found across rounds/channels of an ImageStack + + Returns + ------- + IntensityTable : + concatenated input SpotAttributes, converted to an IntensityTable object + + """ + + all_spots = pd.concat([sa.data for sa in spot_results.values()], sort=True) + + intensity_table = IntensityTable.zeros( + spot_attributes=SpotAttributes(all_spots), + ch_labels=spot_results.ch_labels, + round_labels=spot_results.round_labels, + ) + + i = 0 + for (r, c), attrs in spot_results.items(): + for _, row in attrs.data.iterrows(): + selector = dict(features=i, c=c, r=r) + intensity_table.loc[selector] = row[Features.INTENSITY] + i += 1 + return intensity_table + + def build_traces_nearest_neighbors(spot_results: SpotFindingResults, anchor_round: int=1, search_radius: int=3): """ @@ -65,5 +102,6 @@ def build_traces_nearest_neighbors(spot_results: SpotFindingResults, anchor_roun TRACE_BUILDERS: Mapping[TraceBuildingStrategies, Callable] = { TraceBuildingStrategies.EXACT_MATCH: build_spot_traces_exact_match, - TraceBuildingStrategies.NEAREST_NEIGHBOR: build_traces_nearest_neighbors + TraceBuildingStrategies.NEAREST_NEIGHBOR: build_traces_nearest_neighbors, + TraceBuildingStrategies.SEQUENTIAL: build_traces_sequential, } diff --git a/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py b/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py index 54358ca11..a7cc9fc8b 100644 --- a/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py +++ b/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py @@ -163,6 +163,12 @@ def run( n_processes : Optional[int] = None, Number of processes to devote to spot finding. """ + DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor" + "(See https://github.com/spacetx/starfish/issues/1514) This version of " + "TrackpyLocalMaxPeakFinder will soon be deleted. To find and decode your" + "spots please instead use FindSpots.TrackpyLocalMaxPeakFinder then " + "DecodeSpots.PerRoundMaxChannel with the parameter " + "TraceBuildingStrategies.SEQUENTIAL. See example in smFISH.py") intensity_table = detect_spots( data_stack=primary_image, spot_finding_method=self.image_to_spots, diff --git a/starfish/core/spots/FindSpots/__init__.py b/starfish/core/spots/FindSpots/__init__.py index b9503c617..7e206c8b5 100644 --- a/starfish/core/spots/FindSpots/__init__.py +++ b/starfish/core/spots/FindSpots/__init__.py @@ -1,5 +1,6 @@ from ._base import FindSpotsAlgorithm from .blob import BlobDetector +from .trackpy_local_max_peak_finder import TrackpyLocalMaxPeakFinder # autodoc's automodule directive only captures the modules explicitly listed in __all__. all_filters = { diff --git a/starfish/core/spots/FindSpots/trackpy_local_max_peak_finder.py b/starfish/core/spots/FindSpots/trackpy_local_max_peak_finder.py new file mode 100644 index 000000000..8af1633c9 --- /dev/null +++ b/starfish/core/spots/FindSpots/trackpy_local_max_peak_finder.py @@ -0,0 +1,183 @@ +import warnings +from functools import partial +from typing import Optional, Tuple, Union + +import numpy as np +import xarray as xr +from trackpy import locate + +from starfish.core.image.Filter.util import determine_axes_to_group_by +from starfish.core.imagestack.imagestack import ImageStack +from starfish.core.spots.FindSpots import spot_finding_utils +from starfish.core.types import Axes, SpotAttributes, SpotFindingResults +from ._base import FindSpotsAlgorithm + + +class TrackpyLocalMaxPeakFinder(FindSpotsAlgorithm): + """ + Find spots using a local max peak finding algorithm + + This is a wrapper for :code:`trackpy.locate`, which implements a version of the + `Crocker-Grier `_ algorithm. + + .. _crocker_grier: https://physics.nyu.edu/grierlab/methods3c/ + + Parameters + ---------- + + spot_diameter : + odd integer or tuple of odd integers. + This may be a single number or a tuple giving the feature’s extent in each dimension, + useful when the dimensions do not have equal resolution (e.g. confocal microscopy). + The tuple order is the same as the image shape, conventionally (z, y, x) or (y, x). + The number(s) must be odd integers. When in doubt, round up. + min_mass : Optional[float] + The minimum integrated brightness. This is a crucial parameter for eliminating spurious + features. Recommended minimum values are 100 for integer images and 1 for float images. + Defaults to 0 (no filtering). + max_size : float + maximum radius-of-gyration of brightness, default None + separation : Union[float, tuple] + Minimum separation between features. Default is diameter + 1. May be a tuple, see + diameter for details. + percentile : float + Features must have a peak brighter than pixels in this percentile. This helps eliminate + spurious peaks. (default = 0) + noise_size : Union[float, tuple] + Width of Gaussian blurring kernel, in pixels Default is 1. May be a tuple, see diameter + for details. + smoothing_size : Union[float, tuple] + The size of the sides of the square kernel used in boxcar (rolling average) smoothing, + in pixels Default is diameter. May be a tuple, making the kernel rectangular. + threshold : float + Clip bandpass result below this value. Thresholding is done on the already + background-subtracted image. By default, 1 for integer images and 1/255 for float + images. + measurement_type : str ['max', 'mean'] + name of the function used to calculate the intensity for each identified spot area + (default = max) + preprocess : boolean + Set to False to turn off bandpass prepossessing. + max_iterations : integer + Max number of loops to refine the center of mass, (default = 10) + is_volume : bool + if True, run the algorithm on 3d volumes of the provided stack (default = False) + verbose : bool + If True, report the percentage completed (default = False) during processing + + Notes + ----- + See also trackpy locate: http://soft-matter.github.io/trackpy/dev/generated/trackpy.locate.html + + """ + + def __init__( + self, spot_diameter, min_mass, max_size, separation, percentile=0, + noise_size: Tuple[int, int, int] = (1, 1, 1), smoothing_size=None, threshold=None, + preprocess: bool = False, max_iterations: int = 10, measurement_type: str = 'max', + is_volume: bool = False, verbose=False, radius_is_gyration: bool = False) -> None: + + self.diameter = spot_diameter + self.minmass = min_mass + self.maxsize = max_size + self.separation = separation + self.noise_size = noise_size + self.smoothing_size = smoothing_size + self.percentile = percentile + self.threshold = threshold + self.measurement_function = self._get_measurement_function(measurement_type) + self.preprocess = preprocess + self.max_iterations = max_iterations + self.is_volume = is_volume + self.verbose = verbose + self.radius_is_gyration = radius_is_gyration + + def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: + """ + + Parameters + ---------- + data_image : np.ndarray + three-dimensional image containing spots to be detected + + Returns + ------- + SpotAttributes : + spot attributes table for all detected spots + + """ + data_image = np.asarray(data_image) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', FutureWarning) # trackpy numpy indexing warning + warnings.simplefilter('ignore', UserWarning) # yielded if black images + attributes = locate( + data_image, + diameter=self.diameter, + minmass=self.minmass, + maxsize=self.maxsize, + separation=self.separation, + noise_size=self.noise_size, + smoothing_size=self.smoothing_size, + threshold=self.threshold, + percentile=self.percentile, + preprocess=self.preprocess, + max_iterations=self.max_iterations, + ) + + # when zero spots are detected, 'ep' is missing from the trackpy locate results. + if attributes.shape[0] == 0: + attributes['ep'] = [] + + # TODO ambrosejcarr: data should always be at least pseudo-3d, this may not be necessary + # TODO ambrosejcarr: this is where max vs. sum vs. mean would be parametrized. + # here, total_intensity = sum, intensity = max + new_colnames = [ + 'y', 'x', 'total_intensity', 'radius', 'eccentricity', 'intensity', 'raw_mass', 'ep' + ] + if len(data_image.shape) == 3: + attributes.columns = ['z'] + new_colnames + else: + attributes.columns = new_colnames + + attributes['spot_id'] = np.arange(attributes.shape[0]) + return SpotAttributes(attributes) + + def run( + self, + image_stack: ImageStack, + reference_image: Optional[ImageStack] = None, + n_processes: Optional[int] = None, + *args, + ) -> SpotFindingResults: + """ + Find spots. + + Parameters + ---------- + image_stack : ImageStack + ImageStack where we find the spots in. + reference_image : xr.DataArray + (Optional) a reference image. If provided, spots will be found in this image, and then + the locations that correspond to these spots will be measured across each channel. + n_processes : Optional[int] = None, + Number of processes to devote to spot finding. + """ + spot_finding_method = partial(self.image_to_spots, *args) + if reference_image: + data_image = reference_image._squeezed_numpy(*{Axes.ROUND, Axes.CH}) + reference_spots = spot_finding_method(data_image) + results = spot_finding_utils.measure_intensities_at_spot_locations_across_imagestack( + image_stack, + reference_spots, + measurement_function=self.measurement_function, + radius_is_gyration=self.radius_is_gyration) + else: + spot_attributes_list = image_stack.transform( + func=spot_finding_method, + group_by=determine_axes_to_group_by(self.is_volume), + n_processes=n_processes + ) + results = SpotFindingResults(imagestack_coords=image_stack.xarray.coords, + log=image_stack.log, + spot_attributes_list=spot_attributes_list) + return results diff --git a/starfish/core/types/_constants.py b/starfish/core/types/_constants.py index e629d74f6..9ebcf15d4 100644 --- a/starfish/core/types/_constants.py +++ b/starfish/core/types/_constants.py @@ -112,3 +112,4 @@ class TraceBuildingStrategies(AugmentedEnum): """ EXACT_MATCH = 'exact_match' NEAREST_NEIGHBOR = 'nearest_neighbor' + SEQUENTIAL = 'sequential' diff --git a/starfish/data.py b/starfish/data.py index 5b1099f8e..fc37f8784 100644 --- a/starfish/data.py +++ b/starfish/data.py @@ -46,8 +46,11 @@ def allen_smFISH(use_test_data: bool = False) -> Experiment: ------- Experiment """ + if use_test_data: + return Experiment.from_json( + "https://d2nhj9g34unfro.cloudfront.net/20181005/allen_smFISH/experiment.json") return Experiment.from_json( - "https://d2nhj9g34unfro.cloudfront.net/20181005/allen_smFISH/experiment.json") + "https://d26bikfyahveg8.cloudfront.net/smFISH/mouse/formatted_with_DAPI/experiment.json") def DARTFISH(use_test_data: bool = False) -> Experiment: diff --git a/starfish/test/full_pipelines/api/test_allen_smFISH.py b/starfish/test/full_pipelines/api/test_allen_smFISH.py index 4511db1af..f4ff3eaea 100644 --- a/starfish/test/full_pipelines/api/test_allen_smFISH.py +++ b/starfish/test/full_pipelines/api/test_allen_smFISH.py @@ -4,10 +4,11 @@ import starfish.data from starfish import FieldOfView from starfish.image import Filter -from starfish.spots import DetectSpots +from starfish.spots import FindSpots +from starfish.types import TraceBuildingStrategies -@pytest.mark.skip('issues with checksums prevent this data from working properly') +@pytest.mark.skip('This test runs but takes forever') def test_allen_smFISH_cropped_data(): # set random seed to errors provoked by optimization functions @@ -21,7 +22,7 @@ def test_allen_smFISH_cropped_data(): clip = Filter.Clip(p_min=10, p_max=100) clipped_image = clip.run(primary_image, in_place=False) - bandpass = Filter.Bandpass(lshort=0.5, llong=7, threshold=None, truncate=4) + bandpass = Filter.Bandpass(lshort=0.5, llong=7, threshold=0.0, truncate=4) bandpassed_image = bandpass.run(clipped_image, in_place=False) clip = Filter.Clip(p_min=10, p_max=100, is_volume=False) @@ -31,15 +32,22 @@ def test_allen_smFISH_cropped_data(): glp = Filter.GaussianLowPass(sigma=sigma, is_volume=True) z_filtered_image = glp.run(clipped_bandpassed_image, in_place=False) - lmpf = DetectSpots.TrackpyLocalMaxPeakFinder( - spot_diameter=3, - min_mass=300, - max_size=3, - separation=5, - noise_size=0.65, + tlmpf = FindSpots.TrackpyLocalMaxPeakFinder( + spot_diameter=5, # must be odd integer + min_mass=0.02, + max_size=2, # this is max radius + separation=7, + noise_size=0.65, # this is not used because preprocess is False preprocess=False, percentile=10, + # this is irrelevant when min_mass, spot_diameter, and max_size are set properly verbose=True, is_volume=True, ) - intensities = lmpf.run(z_filtered_image) # noqa + spots = tlmpf.run(z_filtered_image) # noqa + + decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel( + codebook=experiment.codebook, + trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL + ) + decoder.run(spots=spots)