Skip to content

Commit

Permalink
Fix dimension ordering on Codebook and IntensityTable (#1600)
Browse files Browse the repository at this point in the history
These two seem to think that 'c' precedes 'r', which is different from the rest of the codebase.
  • Loading branch information
Tony Tung authored Oct 4, 2019
1 parent 912d176 commit 35e8880
Show file tree
Hide file tree
Showing 16 changed files with 84 additions and 89 deletions.
36 changes: 18 additions & 18 deletions starfish/core/codebook/codebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
class Codebook(xr.DataArray):
"""Codebook for an image-based transcriptomics experiment
The codebook is a three dimensional tensor with shape :code:`(feature, channel, round)` whose
The codebook is a three dimensional tensor with shape :code:`(feature, round, channel)` whose
values are the expected intensity of features (spots or pixels) that correspond to each target
(gene or protein) in each of the image tiles of an experiment.
Expand All @@ -44,9 +44,9 @@ class Codebook(xr.DataArray):
Build a codebook using :py:meth:`Codebook.synthetic_one_hot_codebook`::
>>> from starfish import Codebook
>>> sd = Codebook.synthetic_one_hot_codebook(n_channel=3, n_round=4, n_codes=2)
>>> sd = Codebook.synthetic_one_hot_codebook(n_round=4, n_channel=3, n_codes=2)
>>> sd.codebook()
<xarray.Codebook (target: 2, c: 3, r: 4)>
<xarray.Codebook (target: 2, r: 4, c: 3)>
array([[[0, 0, 0, 0],
[0, 0, 1, 1],
[1, 1, 0, 0]],
Expand All @@ -67,25 +67,25 @@ def code_length(self) -> int:
return int(np.dot(*self.shape[1:]))

@classmethod
def zeros(cls, code_names: Sequence[str], n_channel: int, n_round: int):
def zeros(cls, code_names: Sequence[str], n_round: int, n_channel: int):
"""
Create an empty codebook of shape (code_names, n_channel, n_round)
Create an empty codebook of shape (code_names, n_round, n_channel)
Parameters
----------
code_names : Sequence[str]
The targets to be coded.
n_channel : int
Number of channels used to build the codes.
n_round : int
Number of imaging rounds used to build the codes.
n_channel : int
Number of channels used to build the codes.
Examples
--------
Build an empty 2-round 3-channel codebook::
>>> from starfish import Codebook
>>> Codebook.zeros(['ACTA', 'ACTB'], n_channel=3, n_round=2)
>>> Codebook.zeros(['ACTA', 'ACTB'], n_round=2, n_channel=3)
<xarray.Codebook (target: 2, c: 3, r: 2)>
array([[[0, 0],
[0, 0],
Expand All @@ -105,27 +105,27 @@ def zeros(cls, code_names: Sequence[str], n_channel: int, n_round: int):
codebook whose values are all zero
"""
data = np.zeros((len(code_names), n_channel, n_round), dtype=np.uint8)
return cls.from_numpy(code_names, n_channel, n_round, data)
data = np.zeros((len(code_names), n_round, n_channel), dtype=np.uint8)
return cls.from_numpy(code_names, n_round, n_channel, data)

@classmethod
def from_numpy(
cls,
code_names: Sequence[str],
n_channel: int,
n_round: int,
n_channel: int,
data: np.ndarray,
) -> "Codebook":
"""create a codebook of shape (code_names, n_channel, n_round) from a 3-d numpy array
"""create a codebook of shape (code_names, n_round, n_channel) from a 3-d numpy array
Parameters
----------
code_names : Sequence[str]
the targets to be coded
n_channel : int
number of channels used to build the codes
n_round : int
number of imaging rounds used to build the codes
n_channel : int
number of channels used to build the codes
data : np.ndarray
array of unit8 values with len(code_names) x n_channel x n_round elements
Expand Down Expand Up @@ -164,8 +164,8 @@ def from_numpy(
data=data,
coords=(
pd.Index(code_names, name=Features.TARGET),
pd.Index(np.arange(n_channel), name=Axes.CH.value),
pd.Index(np.arange(n_round), name=Axes.ROUND.value),
pd.Index(np.arange(n_channel), name=Axes.CH.value),
)
)

Expand Down Expand Up @@ -284,13 +284,13 @@ def from_code_array(
target_names = [w[Features.TARGET] for w in code_array]

# fill the codebook
data = np.zeros((len(target_names), n_channel, n_round), dtype=np.uint8)
data = np.zeros((len(target_names), n_round, n_channel), dtype=np.uint8)
for i, code_dict in enumerate(code_array):
for bit in code_dict[Features.CODEWORD]:
ch = int(bit[Axes.CH])
r = int(bit[Axes.ROUND])
data[i, ch, r] = int(bit[Features.CODE_VALUE])
return cls.from_numpy(target_names, n_channel, n_round, data)
data[i, r, ch] = int(bit[Features.CODE_VALUE])
return cls.from_numpy(target_names, n_round, n_channel, data)

@classmethod
def open_json(
Expand Down
10 changes: 5 additions & 5 deletions starfish/core/codebook/test/test_approximate_nearest_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ def test_simple_intensities_find_correct_nearest_code():
data = np.array(
[[[0, 0.5],
[0.5, 0]],
[[0, 0.5],
[0, 0.5]],
[[0.5, 0],
[0.5, 0]],
[[0, 0],
[0.5, 0.5]]]
[0.5, 0.5]],
[[0.5, 0.5],
[0, 0]],
[[0, 0.5],
[0, 0.5]]]
)
intensities = intensity_table_factory(data=data)
codebook = codebook_factory()
Expand Down
12 changes: 6 additions & 6 deletions starfish/core/codebook/test/test_from_code_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def assert_sizes(codebook, check_values=True):
return

# codebook should have 4 "on" combinations
expected_values = np.zeros((2, 3, 2))
expected_values = np.zeros((2, 2, 3))
expected_values[0, 0, 0] = 1
expected_values[0, 1, 1] = 1
expected_values[1, 2, 0] = 1
expected_values[1, 0, 2] = 1
expected_values[1, 1, 1] = 1

assert np.array_equal(codebook.values, expected_values)
Expand Down Expand Up @@ -129,20 +129,20 @@ def test_from_code_array_throws_exception_when_data_is_improperly_formatted():
def test_empty_codebook():
code_array: List = codebook_array_factory()
targets = [x[Features.TARGET] for x in code_array]
codebook = Codebook.zeros(targets, n_channel=3, n_round=2)
codebook = Codebook.zeros(targets, n_round=2, n_channel=3)
assert_sizes(codebook, False)

def test_create_codebook():
code_array: List = codebook_array_factory()
targets = [x[Features.TARGET] for x in code_array]

# Loop performed by from_code_array
data = np.zeros((2, 3, 2), dtype=np.uint8)
data = np.zeros((2, 2, 3), dtype=np.uint8)
for i, code_dict in enumerate(code_array):
for bit in code_dict[Features.CODEWORD]:
ch = int(bit[Axes.CH])
r = int(bit[Axes.ROUND])
data[i, ch, r] = int(bit[Features.CODE_VALUE])
data[i, r, ch] = int(bit[Features.CODE_VALUE])

codebook = Codebook.from_numpy(targets, n_channel=3, n_round=2, data=data)
codebook = Codebook.from_numpy(targets, n_round=2, n_channel=3, data=data)
assert_sizes(codebook)
8 changes: 4 additions & 4 deletions starfish/core/codebook/test/test_metric_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ def test_metric_decode():
match
"""
data = np.array(
[[[0, 3], # this code is decoded "right"
[4, 0]],
[[0, 0.4], # this code should be filtered based on magnitude
[0, 0.3]],
[[[0, 4], # this code is decoded "right"
[3, 0]],
[[0, 0], # this code should be filtered based on magnitude
[0.4, 0.3]],
[[30, 0], # this code should be filtered based on distance
[0, 40]]]
)
Expand Down
13 changes: 8 additions & 5 deletions starfish/core/codebook/test/test_per_round_max_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def intensity_table_factory(data: np.ndarray=np.array([[[0, 3], [4, 0]]])) -> In
spot_attributes = SpotAttributes(spot_attributes_data)
intensity_table = IntensityTable.from_spot_data(
data, spot_attributes,
ch_values=np.arange(data.shape[1]),
round_values=np.arange(data.shape[2]),
round_values=np.arange(data.shape[1]),
ch_values=np.arange(data.shape[2]),
)
return intensity_table

Expand Down Expand Up @@ -54,14 +54,17 @@ def codebook_factory() -> Codebook:
return Codebook.from_code_array(codebook_array)


def test_intensity_tables_with_different_nubmers_of_codes_or_channels_throw_value_error():
def test_intensity_tables_with_different_numbers_of_codes_or_channels_throw_value_error():
"""
The test passes a 3-round and 1-round IntensityTable to a 2-round codebook. Both should
raise a ValueError.
"""
data = np.array(
[[[4, 3, 1],
[4, 0, 2]]]
[[[4, 4],
[3, 0],
[1, 2],
]
]
)
codebook = codebook_factory()
intensities = intensity_table_factory(data)
Expand Down
40 changes: 19 additions & 21 deletions starfish/core/intensity_table/intensity_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,26 @@ class IntensityTable(xr.DataArray):
@staticmethod
def _build_xarray_coords(
spot_attributes: SpotAttributes,
round_values: Sequence[int],
channel_values: Sequence[int],
round_values: Sequence[int]
) -> Dict[Hashable, np.ndarray]:
"""build coordinates for intensity-table"""
coordinates = {
k: (Features.AXIS, spot_attributes.data[k].values)
for k in spot_attributes.data}
coordinates.update({
Features.AXIS: np.arange(len(spot_attributes.data)),
Axes.CH.value: np.array(channel_values),
Axes.ROUND.value: np.array(round_values),
Axes.CH.value: np.array(channel_values),
})
return coordinates

@classmethod
def zeros(
cls,
spot_attributes: SpotAttributes,
ch_labels: Sequence[int],
round_labels: Sequence[int],
ch_labels: Sequence[int],
) -> "IntensityTable":
"""
Create an empty intensity table with pre-set shape whose values are zero.
Expand All @@ -99,12 +99,12 @@ def zeros(
spot_attributes : SpotAttributes
Table containing spot metadata. Must contain the values specified in Axes.X,
Y, Z, and RADIUS.
ch_labels : Sequence[int]
The possible values for the channel number, in the order that they are in the ImageStack
5D tensor.
round_labels : Sequence[int]
The possible values for the round number, in the order that they are in the ImageStack
5D tensor.
ch_labels : Sequence[int]
The possible values for the channel number, in the order that they are in the ImageStack
5D tensor.
Returns
-------
Expand All @@ -115,10 +115,9 @@ def zeros(
if not isinstance(spot_attributes, SpotAttributes):
raise TypeError('parameter spot_attributes must be a starfish SpotAttributes object.')

data = np.zeros((spot_attributes.data.shape[0], len(ch_labels), len(round_labels)))
dims = (Features.AXIS, Axes.CH.value, Axes.ROUND.value)
coords = cls._build_xarray_coords(
spot_attributes, np.array(ch_labels), round_labels)
data = np.zeros((spot_attributes.data.shape[0], len(round_labels), len(ch_labels)))
dims = (Features.AXIS, Axes.ROUND.value, Axes.CH.value,)
coords = cls._build_xarray_coords(spot_attributes, round_labels, ch_labels)

intensity_table = cls(
data=data, coords=coords, dims=dims,
Expand All @@ -131,8 +130,8 @@ def from_spot_data(
cls,
intensities: Union[xr.DataArray, np.ndarray],
spot_attributes: SpotAttributes,
ch_values: Sequence[int],
round_values: Sequence[int],
ch_values: Sequence[int],
*args, **kwargs) -> "IntensityTable":
"""
Creates an IntensityTable from a :code:`(features, channel, round)`
Expand Down Expand Up @@ -167,24 +166,24 @@ def from_spot_data(
f'intensities must be a (features * ch * round) 3-d tensor. Provided intensities '
f'shape ({intensities.shape}) is invalid.')

if len(ch_values) != intensities.shape[1]:
if len(round_values) != intensities.shape[1]:
raise ValueError(
f"The number of ch values ({len(ch_values)}) should be equal to intensities' "
f"The number of round values ({len(round_values)}) should be equal to intensities' "
f"shape[1] ({intensities.shape[1]})."
)

if len(round_values) != intensities.shape[2]:
if len(ch_values) != intensities.shape[2]:
raise ValueError(
f"The number of round values ({len(ch_values)}) should be equal to intensities' "
f"The number of ch values ({len(ch_values)}) should be equal to intensities' "
f"shape[2] ({intensities.shape[2]})."
)

if not isinstance(spot_attributes, SpotAttributes):
raise TypeError('parameter spot_attributes must be a starfish SpotAttributes object.')

coords = cls._build_xarray_coords(spot_attributes, ch_values, round_values)
coords = cls._build_xarray_coords(spot_attributes, round_values, ch_values)

dims = (Features.AXIS, Axes.CH.value, Axes.ROUND.value)
dims = (Features.AXIS, Axes.ROUND.value, Axes.CH.value)

intensities = cls(intensities, coords, dims, *args, **kwargs)
return intensities
Expand Down Expand Up @@ -362,13 +361,12 @@ def from_image_stack(
Axes.ZPLANE.value,
Axes.Y.value,
Axes.X.value,
Axes.CH.value,
Axes.ROUND.value,
Axes.CH.value,
)

# (pixels, ch, round)
intensity_data = data.values.reshape(
-1, image_stack.num_chs, image_stack.num_rounds)
intensity_data = data.values.reshape(-1, image_stack.num_rounds, image_stack.num_chs)

# IntensityTable pixel coordinates
z = image_stack.axis_labels(Axes.ZPLANE)
Expand All @@ -388,8 +386,8 @@ def from_image_stack(
return IntensityTable.from_spot_data(
intensity_data,
pixel_coordinates,
image_stack.axis_labels(Axes.CH),
image_stack.axis_labels(Axes.ROUND),
image_stack.axis_labels(Axes.CH),
)

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion starfish/core/intensity_table/test/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_intensity_table_concatenation():

# slice out z in addition to reduce the total feature number by 1/2
i4 = intensities.where(np.logical_and(intensities.r == 0, intensities.z == 1), drop=True)
expected_shape = (i1.shape[0] + i4.shape[0], 3, 1)
expected_shape = (i1.shape[0] + i4.shape[0], 1, 3)
result = concatenate([i1, i4])

assert expected_shape == result.shape
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def test_intensity_table_can_be_created_from_spot_attributes():

intensities = IntensityTable.zeros(
spot_attributes,
round_labels=np.arange(3),
ch_labels=np.arange(1),
round_labels=np.arange(3)
)

assert intensities.sizes[Axes.CH] == 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def pixel_intensities_to_imagestack(
# reverses the process used to produce the intensity table in to_pixel_intensities
data = intensities.values.reshape([
*image_shape,
intensities.sizes[Axes.CH],
intensities.sizes[Axes.ROUND]])
data = data.transpose(4, 3, 0, 1, 2)
intensities.sizes[Axes.ROUND],
intensities.sizes[Axes.CH]])
data = data.transpose(3, 4, 0, 1, 2)
return ImageStack.from_numpy(data)
10 changes: 4 additions & 6 deletions starfish/core/intensity_table/test/test_synthetic_intensities.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def test_synthetic_intensity_generation():
assert np.all(intensities[Axes.X.value] <= width)

# both codes should match GENE_B
assert np.array_equal(
np.where(intensities.values),
[[0, 0, 1, 1], # two each in feature 0 & 1
[1, 2, 1, 2], # one each in channel 1 & 2
[1, 0, 1, 0]], # channel 1 matches round 1, channel 2 matches round zero
)
gene_b_intensities = codebook.sel(target="GENE_B")
for feature_id in range(intensities.sizes[Features.AXIS]):
feature_intensities = intensities[{Features.AXIS: feature_id}]
assert np.array_equal(gene_b_intensities.values, feature_intensities.values)
Loading

0 comments on commit 35e8880

Please sign in to comment.