-
Notifications
You must be signed in to change notification settings - Fork 2
/
reader.py
199 lines (157 loc) · 6.02 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import os
import warnings
from typing import Optional, Union
import dask
import dask.array as da
import numpy as np
from skimage import io
from .transform import parse_transforms
from .utils import (
Channels,
crop_image,
parse_filename,
remove_background,
remove_outliers,
)
class DaskOctopusLiteLoader:
"""Load multidimensional image stacks using lazy loading.
A simple class to load OctopusLite data from a directory. Caches data once
it is loaded to prevent excessive I/O to the data server. Can directly
address different channels using the `Channels` enumerator.
Parameters
----------
path : str
The path to the dataset.
crop : tuple, optional
An optional tuple which can be used to perform a centred crop on the data.
transforms : Path to transform matrix
Transform matrix (as np.ndarray) to be applied to the image stack.
remove_background : bool
Use a estimated polynomial surface to remove uneven illumination.
Methods
-------
__getitem__ : Channels, str
Return a dask lazy array of the image data for the channel. If cropping
has been specified, the images are also cropped to this size.
Properties
----------
shape :
Returns the shape of the uncropped data.
channels :
Return the channels found in the dataset.
path :
Return the path to the dataset.
files(channel_name) :
Return the pathname of all channel_name images found in the dataset.
channel_name_from_index(channel_value):
Return the channel specified by channel_value.
Usage
-----
>>> octopus = DaskOctopusLiteLoader('/path/to/your/data/')
>>> gfp = octopus["GFP"]
>>> gfp_filenames = octopus.files("GFP")
"""
def __init__(
self,
path: str,
crop: Optional[tuple] = None,
transforms: Optional[os.PathLike] = None,
remove_background: bool = True,
):
self.path = path
self._files = {}
self._lazy_arrays = {}
self._crop = crop
self._shape = ()
self._remove_background = remove_background
if self._crop is not None:
print(f"Using cropping: {crop}")
# parse the files
self._parse_files()
if transforms:
self._transformer = parse_transforms(transforms)
else:
self._transformer = None
def __contains__(self, channel):
return channel in self.channels
@property
def channels(self):
return list(self._files.keys())
@property
def shape(self):
return self._shape
def __getitem__(self, channel_name: Union[str, Channels]):
if isinstance(channel_name, str):
channel_name = Channels[channel_name.upper()]
if channel_name not in self.channels:
raise ValueError(f"Channel {channel_name} not found in {self.path}")
return self._lazy_arrays[channel_name]
def files(self, channel_name: str) -> list:
return self._files[Channels[channel_name.upper()]]
def _load_and_process(self, fn: str) -> np.ndarray:
"""Load and crop the image."""
image = io.imread(fn)
if self._transformer is not None:
# need to use index of file as some frames may have been removed
channel = parse_filename(fn)["channel"]
files = self.files(channel.name)
files.sort(key=lambda f: parse_filename(f)["time"])
idx = files.index(fn)
image = self._transformer(image, idx)
if self._crop is not None:
assert isinstance(self._crop, tuple)
crop = np.array(self._crop).astype(np.int64)
# check that we don't exceed any dimensions
assert all([crop[i] <= s for i, s in enumerate(image.shape)])
# crop the image
image = crop_image(image, crop)
# check channel to see if label
channel = parse_filename(fn)["channel"]
# labels cannot be preprocessed so return here
if channel.name.startswith(("MASK", "WEIGHTS")):
return image
if self._remove_background:
cleaned = remove_outliers(image)
image = remove_background(cleaned)
if self._crop is None:
warnings.warn(
"Background removal works best on cropped, aligned image. Will fail on uncropped, aligned images due to border effect."
)
return image
def _parse_files(self):
"""Parse out the files from the folder and create lazy arrays."""
# find the files in the dataset
files = [
os.path.join(self.path, f)
for f in os.listdir(self.path)
if f.endswith((".tif", ".tiff"))
]
if not files:
raise FileNotFoundError(f"No files found in directory: {self.path}")
# take a sample of the dataset
sample = io.imread(files[0])
self._shape = sample.shape if self._crop is None else self._crop
channels = {k: [] for k in Channels}
# parse all the files
for f in files:
channel = parse_filename(f)["channel"]
channels[channel].append(f)
# sort them by time
for channel in channels.keys():
channels[channel].sort(key=lambda f: parse_filename(f)["time"])
# set the output type
dtype = np.float32 if self._remove_background else sample.dtype
# remove any channels that are empty
self._files = {k: v for k, v in channels.items() if v}
# now set up the lazy loaders
for channel, files in self._files.items():
self._lazy_arrays[channel] = [
da.from_delayed(
dask.delayed(self._load_and_process)(fn),
shape=self._shape,
dtype=dtype,
)
for fn in files
]
# concatenate them along the time axis
self._lazy_arrays[channel] = da.stack(self._lazy_arrays[channel], axis=0)