Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for dask and zarr arrays #805

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions doc/WhatsNew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ Ver 3.3.0 (2022-02-16)
well; they were previously pending deprecation.
- Fixed an issue where image might not be redrawn properly if scale or
pan is set directly via a viewer's settings object (not the usual case)
- Added internal support for dask and zarr arrays

Ver 3.2.0 (2021-06-07)
======================
Expand Down
12 changes: 12 additions & 0 deletions doc/dev_manual/image_wrappers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ Once you have an object, you can load data directly contained in a

>>> img.update_keywords(kw_dict)

``AstroImage`` objects also support ``dask`` arrays::

>>> import dask.array as da
>>> data_dk = da.from_array(data, chunks=(100, 100))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just learned from @Cadair that providing a name= is very important, so that dask doesn't try to generate a name from data hash and eats up all the computing time.

>>> img.load_data(data_dk)

and ``zarr`` arrays::

>>> import zarr
>>> data_z = zarr.creation.array(data, chunks=(100, 100))
>>> img.load_data(data_z)

From an ``astropy.io.fits.HDU``::

>>> from astropy.io import fits
Expand Down
2 changes: 0 additions & 2 deletions ginga/AstroImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ def setup_data(self, data, naxispath=None):
# initialize data attribute to something reasonable
if data is None:
data = np.zeros((0, 0))
elif not isinstance(data, np.ndarray):
data = np.zeros((0, 0))
elif 0 in data.shape:
data = np.zeros((0, 0))

Expand Down
18 changes: 9 additions & 9 deletions ginga/BaseImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ def get_data_xy(self, x, y):
assert (x >= 0) and (y >= 0), \
ImageError("Indexes out of range: (x=%d, y=%d)" % (
x, y))
view = np.s_[y, x]
res = self._slice(view)
view = np.s_[[y], [x]]
res = self._slice(view)[0][0]
if isinstance(res, np.ndarray) and self.get('ignore_alpha', False):
# <-- this image has a "hidden" alpha array
# NOTE: assumes that data is at index 0
Expand Down Expand Up @@ -207,9 +207,9 @@ def clear_all(self):
self._data = np.zeros((0, 0))

def _slice(self, view):
if not isinstance(view, tuple):
view = tuple(view)
return self._get_data()[view]
d_obj = self._get_data()
arr = trcalc.fancy_index(d_obj, view)
return np.asarray(arr)

def get_slice(self, c):
view = [slice(None)] * self.ndim
Expand Down Expand Up @@ -377,11 +377,11 @@ def cutout_cross(self, x, y, radius):
x0, x1 = max(0, x - n), min(wd - 1, x + n)
y0, y1 = max(0, y - n), min(ht - 1, y + n)

xview = np.s_[y, x0:x1 + 1]
yview = np.s_[y0:y1 + 1, x]
xview = np.s_[y:y + 1, x0:x1 + 1]
yview = np.s_[y0:y1 + 1, x:x + 1]

xarr = self._slice(xview)
yarr = self._slice(yview)
xarr = self._slice(xview).ravel()
yarr = self._slice(yview).ravel()

return (x0, y0, xarr, yarr)

Expand Down
85 changes: 85 additions & 0 deletions ginga/tests/test_dask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import hashlib
import numpy as np
import pytest

da = pytest.importorskip('dask.array')
pllim marked this conversation as resolved.
Show resolved Hide resolved

from ginga import AstroImage, trcalc
from ginga.misc import log


class TestDask:
def setup_class(self):
self.logger = log.get_logger("TestDask", null=True)

def _getdata(self, shape, data_np=None):
if data_np is None:
data_np = np.min(np.indices(shape), axis=0)
data_np = data_np.reshape(shape)
data_dk = da.from_array(data_np, chunks=100)
return data_dk

def test_dask_slice_trcalc(self):
"""Test that we can get a subslice of a dask array.
pllim marked this conversation as resolved.
Show resolved Hide resolved
"""
arr_dk = self._getdata((1000, 500))

x_slice, y_slice = slice(12, 499, 3), slice(10, 951, 11)
view = (y_slice, x_slice)
data_dk = trcalc.fancy_index(arr_dk, view)
# type is preserved for slicing dask arrays
assert isinstance(data_dk, da.core.Array)
pllim marked this conversation as resolved.
Show resolved Hide resolved
assert data_dk.shape == (86, 163)
data_np = data_dk.compute()
assert isinstance(data_np[0, 0], np.integer)
res = '177e1ed261ea24df277511078631ec0f95dfc3e781ac15b2d200f0f0040282ae'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res
pllim marked this conversation as resolved.
Show resolved Hide resolved

def test_dask_slice_aimg(self):
"""Test that we can get a subslice of an AstroImage object.
"""
aimg = AstroImage.AstroImage(logger=self.logger)
aimg.set_data(self._getdata((700, 800)))

x_slice, y_slice = slice(12, 800, 8), slice(0, 700, 10)
view = (y_slice, x_slice)
data_np = aimg._slice(view)
assert isinstance(data_np, np.ndarray)
assert data_np.shape == (70, 99)
assert isinstance(data_np[0, 0], np.integer)
res = 'd6f0e61dc54f0c888c8f79d94ead85f8d3c4736efede289ff9946e0091960524'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res

def test_dask_aimg_get_data_xy(self):
"""Test that we can get a single value from an AstroImage object.
"""
aimg = AstroImage.AstroImage(logger=self.logger)
aimg.set_data(self._getdata((5, 5), data_np=np.arange(0, 25)))

val = int(aimg.get_data_xy(3, 3))
assert isinstance(val, int)
assert val == 18

def test_dask_fancy_scale(self):
"""Test that we can get a fancy superslice of a dask array.
"""
arr_dk = self._getdata((5, 5, 5))

p1 = (0, 0, 0)
p2 = (5, 5, 5)
new_dims = (51, 51, 51)

data_np, scales = trcalc.get_scaled_cutout_wdhtdp(arr_dk, p1, p2,
new_dims,
logger=self.logger)
assert isinstance(data_np, np.ndarray)
assert data_np.shape == new_dims
assert isinstance(data_np[0, 0, 0], np.integer)
res = '4d6bb43463f435d76d226c38314fa22a5ba540b7db785b1ccfd2c75d84063fc4'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res
79 changes: 79 additions & 0 deletions ginga/tests/test_numpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import hashlib
import numpy as np

from ginga import AstroImage, trcalc
from ginga.misc import log


class TestNumpy:
def setup_class(self):
self.logger = log.get_logger("TestNumpy", null=True)

def _getdata(self, shape, data_np=None):
if data_np is None:
data_np = np.min(np.indices(shape), axis=0)
data_np = data_np.reshape(shape)
return data_np

def test_numpy_slice_trcalc(self):
"""Test that we can get a subslice of an array.
"""
arr_np = self._getdata((1000, 500))

x_slice, y_slice = slice(12, 499, 3), slice(10, 951, 11)
view = (y_slice, x_slice)
data_np = trcalc.fancy_index(arr_np, view)
assert isinstance(data_np, np.ndarray)
assert data_np.shape == (86, 163)
pllim marked this conversation as resolved.
Show resolved Hide resolved
assert isinstance(data_np[0, 0], np.integer)
res = '177e1ed261ea24df277511078631ec0f95dfc3e781ac15b2d200f0f0040282ae'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res

def test_numpy_slice_aimg(self):
"""Test that we can get a subslice of an AstroImage object.
"""
aimg = AstroImage.AstroImage(logger=self.logger)
aimg.set_data(self._getdata((700, 800)))

x_slice, y_slice = slice(12, 800, 8), slice(0, 700, 10)
view = (y_slice, x_slice)
data_np = aimg._slice(view)
assert isinstance(data_np, np.ndarray)
assert data_np.shape == (70, 99)
assert isinstance(data_np[0, 0], np.integer)
res = 'd6f0e61dc54f0c888c8f79d94ead85f8d3c4736efede289ff9946e0091960524'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res

def test_numpy_aimg_getdata_xy(self):
"""Test that we can get a single value from an AstroImage object.
"""
aimg = AstroImage.AstroImage(logger=self.logger)
aimg.set_data(self._getdata((5, 5), data_np=np.arange(0, 25)))

val = int(aimg.get_data_xy(3, 3))
assert isinstance(val, int)
assert val == 18

def test_numpy_fancy_scale(self):
"""Test that we can get a fancy superslice of a numpy array.
"""
arr_np = self._getdata((5, 5, 5))

p1 = (0, 0, 0)
p2 = (5, 5, 5)
new_dims = (51, 51, 51)

data_np, scales = trcalc.get_scaled_cutout_wdhtdp(arr_np, p1, p2,
new_dims,
logger=self.logger)
assert isinstance(data_np, np.ndarray)
assert data_np.shape == new_dims
assert isinstance(data_np[0, 0, 0], np.integer)
res = '4d6bb43463f435d76d226c38314fa22a5ba540b7db785b1ccfd2c75d84063fc4'
m = hashlib.sha256()
m.update(str(data_np.tolist()).encode())
assert m.hexdigest() == res
83 changes: 31 additions & 52 deletions ginga/tests/test_trcalc.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,60 @@

import hashlib
import numpy as np

from ginga import trcalc


class TestTrcalc:

def _2ddata(self):
data = np.zeros((10, 10), dtype=int)
for i in range(10):
for j in range(10):
data[i, j] = min(i, j)
return data

def _3ddata(self):
data = np.zeros((10, 10, 10), dtype=int)
for i in range(10):
for j in range(10):
for k in range(10):
data[i, j, k] = min(i, j, k)
return data
def _2ddata(self, shape, data_np=None):
if data_np is None:
data_np = np.asarray([min(i, j)
for i in range(shape[0])
for j in range(shape[1])])
data_np = data_np.reshape(shape)
return data_np

def _3ddata(self, shape, data_np=None):
if data_np is None:
data_np = np.asarray([min(i, j, k)
for i in range(shape[0])
for j in range(shape[1])
for k in range(shape[2])])
data_np = data_np.reshape(shape)
return data_np

def test_get_scaled_cutout_wdht_view(self):

data = self._2ddata()
data = self._2ddata((10, 10))
p1 = (2, 2)
p2 = (4, 4)
nd = (8, 10)

res = np.asarray([[2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2, 2, 2, 2],
[2, 2, 2, 3, 3, 3, 3, 3],
[2, 2, 2, 3, 3, 3, 3, 3],
[2, 2, 2, 3, 3, 3, 3, 3],
[2, 2, 2, 3, 3, 3, 4, 4],
[2, 2, 2, 3, 3, 3, 4, 4],
[2, 2, 2, 3, 3, 3, 4, 4]])

view, scales = trcalc.get_scaled_cutout_wdht_view(data.shape,
p1[0], p1[1],
p2[0], p2[1],
nd[0], nd[1])
new_data = data[view]
new_data = trcalc.fancy_index(data, view)
assert new_data.shape == (10, 8)
assert np.allclose(new_data, res)
assert isinstance(new_data[0, 0], np.integer)
res = 'dc025d4e14db5529c581cbe24f0616721bb33f63aabcfcc0d432edf00d8cdc2d'
m = hashlib.sha256()
m.update(str(new_data.tolist()).encode())
assert m.hexdigest() == res

def test_get_scaled_cutout_wdhtdp_view(self):

data = self._3ddata()
data = self._3ddata((10, 10, 10))
p1 = (0, 0, 0)
p2 = (9, 9, 9)
nd = (4, 4, 4)

res = np.asarray([[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],

[[0, 0, 0, 0],
[0, 2, 2, 2],
[0, 2, 2, 2],
[0, 2, 2, 2]],

[[0, 0, 0, 0],
[0, 2, 2, 2],
[0, 2, 5, 5],
[0, 2, 5, 5]],

[[0, 0, 0, 0],
[0, 2, 2, 2],
[0, 2, 5, 5],
[0, 2, 5, 7]]])

view, scales = trcalc.get_scaled_cutout_wdhtdp_view(data.shape,
p1, p2, nd)
new_data = data[view]
new_data = trcalc.fancy_index(data, view)
assert new_data.shape == (4, 4, 4)
assert np.allclose(new_data, res)
assert isinstance(new_data[0, 0, 0], np.integer)
res = 'c01c00af06fb2dc5c8cd6cf96927ba6ddd8d2caba3fc33074c9eaab5cc0ac498'
m = hashlib.sha256()
m.update(str(new_data.tolist()).encode())
assert m.hexdigest() == res
Loading