From 01b3add10e8a4312b24dcf1a01097c2a1961d055 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 29 May 2018 11:42:23 +0300 Subject: [PATCH 1/7] add downsample_axis code from image_tools. Definitely need a test, since I don't really know what's going on with WCS yet --- spectral_cube/spectral_cube.py | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index 99f1a66b4..8d2d5797b 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -2956,6 +2956,68 @@ def mask_channels(self, goodchannels): return self.with_mask(goodchannels[:,None,None]) + @warn_slow + def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False): + """ + Downsample the cube by averaging over *factor* pixels along an axis. + Crops right side if the shape is not a multiple of factor. + + The WCS will be 'downsampled' by the specified factor as well. + If the downsample factor is odd, there will be an offset in the WCS. + + (original code came from image_tools) + + Parameters + ---------- + myarr : `~numpy.ndarray` + The array to downsample + factor : int + The factor to downsample by + axis : int + The axis to downsample along + estimator : function + defaults to mean. You can downsample by summing or + something else if you want a different estimator + (e.g., downsampling error: you want to sum & divide by sqrt(n)) + truncate : bool + Whether to truncate the last chunk or average over a smaller number. + e.g., if you downsample [1,2,3,4] by a factor of 3, you could get either + [2] or [2,4] if truncate is True or False, respectively. + """ + # size of the dimension of interest + xs = myarr.shape[axis] + + if xs % int(factor) != 0: + if truncate: + view = [slice(None) for ii in range(myarr.ndim)] + view[axis] = slice(None,xs-(xs % int(factor))) + crarr = myarr[view] + else: + newshape = list(myarr.shape) + newshape[axis] = (factor - xs % int(factor)) + extension = numpy.empty(newshape) * numpy.nan + crarr = numpy.concatenate((myarr,extension), axis=axis) + else: + crarr = myarr + + def makeslice(startpoint,axis=axis,step=factor): + # make empty slices + view = [slice(None) for ii in range(myarr.ndim)] + # then fill the appropriate slice + view[axis] = slice(startpoint,None,step) + return view + + # The extra braces here are crucial: We're adding an extra dimension so we + # can average across it + stacked_array = numpy.concatenate([[crarr[makeslice(ii)]] for ii in + range(factor)]) + + dsarr = estimator(stacked_array, axis=0) + + newwcs = self.wcs[makeslice(factor//2)] + + return self._new_cube_with(data=dsarr, wcs=newwcs) + class VaryingResolutionSpectralCube(BaseSpectralCube, MultiBeamMixinClass): """ A variant of the SpectralCube class that has PSF (beam) information on a From 814c3b4aa32ba32cb8614dadcbfd4c847450309b Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 29 May 2018 17:54:03 +0300 Subject: [PATCH 2/7] add a test and perform many corrections --- spectral_cube/spectral_cube.py | 32 ++++++++++++++++++++---------- spectral_cube/tests/test_regrid.py | 21 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index 8d2d5797b..a172ec4c9 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -2957,7 +2957,7 @@ def mask_channels(self, goodchannels): @warn_slow - def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False): + def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False): """ Downsample the cube by averaging over *factor* pixels along an axis. Crops right side if the shape is not a multiple of factor. @@ -2985,38 +2985,48 @@ def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False [2] or [2,4] if truncate is True or False, respectively. """ # size of the dimension of interest - xs = myarr.shape[axis] + xs = self.shape[axis] if xs % int(factor) != 0: if truncate: - view = [slice(None) for ii in range(myarr.ndim)] + view = [slice(None) for ii in range(self.ndim)] view[axis] = slice(None,xs-(xs % int(factor))) - crarr = myarr[view] + crarr = self.filled_data[view] + mask = self.mask[view].include() else: - newshape = list(myarr.shape) + newshape = list(self.shape) newshape[axis] = (factor - xs % int(factor)) - extension = numpy.empty(newshape) * numpy.nan - crarr = numpy.concatenate((myarr,extension), axis=axis) + extension = np.empty(newshape) * np.nan + crarr = np.concatenate((self.filled_data[:], extension), axis=axis) + extension[:] = 0 + mask = np.concatenate((self.mask.include(), extension), axis=axis) else: - crarr = myarr + crarr = self.filled_data[:] + mask = self.mask[:] def makeslice(startpoint,axis=axis,step=factor): # make empty slices - view = [slice(None) for ii in range(myarr.ndim)] + view = [slice(None) for ii in range(self.ndim)] # then fill the appropriate slice view[axis] = slice(startpoint,None,step) return view # The extra braces here are crucial: We're adding an extra dimension so we # can average across it - stacked_array = numpy.concatenate([[crarr[makeslice(ii)]] for ii in + stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in range(factor)]) dsarr = estimator(stacked_array, axis=0) + stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in + range(factor)]) + + mask = np.any(stacked_array, axis=0) + newwcs = self.wcs[makeslice(factor//2)] - return self._new_cube_with(data=dsarr, wcs=newwcs) + return self._new_cube_with(data=dsarr, wcs=newwcs, + mask=BooleanArrayMask(mask, wcs=newwcs)) class VaryingResolutionSpectralCube(BaseSpectralCube, MultiBeamMixinClass): """ diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py index 5139b3b6d..be6d11e48 100644 --- a/spectral_cube/tests/test_regrid.py +++ b/spectral_cube/tests/test_regrid.py @@ -333,3 +333,24 @@ def test_nocelestial_reproject_2D_fail(): proj.reproject(cube.header) assert exc.value.args[0] == ("WCS does not contain two spatial axes.") + + +def test_downsample(): + cube, data = cube_and_raw('255.fits') + + dscube = cube.downsample_axis(factor=2, axis=0) + + expected = data.mean(axis=0) + + np.testing.assert_almost_equal(expected[None,:,:], + dscube.filled_data[:].value) + + dscube = cube.downsample_axis(factor=2, axis=1) + + expected = np.array([data[:,:2,:].mean(axis=1), + data[:,2:4,:].mean(axis=1), + data[:,4:,:].mean(axis=1), # just data[:,4,:] + ]).swapaxes(0,1) + + np.testing.assert_almost_equal(expected, + dscube.filled_data[:].value) From b75f06664931e04eadec4ac067e948a247cbba11 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 29 May 2018 17:54:49 +0300 Subject: [PATCH 3/7] add a truncation test --- spectral_cube/tests/test_regrid.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py index be6d11e48..d6b3b8199 100644 --- a/spectral_cube/tests/test_regrid.py +++ b/spectral_cube/tests/test_regrid.py @@ -354,3 +354,12 @@ def test_downsample(): np.testing.assert_almost_equal(expected, dscube.filled_data[:].value) + + dscube = cube.downsample_axis(factor=2, axis=1, truncate=True) + + expected = np.array([data[:,:2,:].mean(axis=1), + data[:,2:4,:].mean(axis=1), + ]).swapaxes(0,1) + + np.testing.assert_almost_equal(expected, + dscube.filled_data[:].value) From 4bcf66d3261de20aebcb46e9f52ec7501960cd03 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 29 May 2018 17:58:09 +0300 Subject: [PATCH 4/7] expanded docstring --- spectral_cube/spectral_cube.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index a172ec4c9..217d784bf 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -2965,6 +2965,10 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False): The WCS will be 'downsampled' by the specified factor as well. If the downsample factor is odd, there will be an offset in the WCS. + This implementation is in-memory only; it is therefore prohibitively + expensive for gigantic cubes. There is an open TODO item to implement + a slice-iterative version. + (original code came from image_tools) Parameters From dd8d31e3f3dffa9e090ad90c4590a238e2a69e3b Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 8 Jul 2018 22:29:37 +0200 Subject: [PATCH 5/7] add the memory-free version and tests --- spectral_cube/spectral_cube.py | 107 +++++++++++++++++++++-------- spectral_cube/tests/test_regrid.py | 11 +-- 2 files changed, 85 insertions(+), 33 deletions(-) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index 217d784bf..9aef6a9d1 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -2957,7 +2957,8 @@ def mask_channels(self, goodchannels): @warn_slow - def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False): + def downsample_axis(self, factor, axis, estimator=np.nanmean, + truncate=False, use_memmap=True, progressbar=True): """ Downsample the cube by averaging over *factor* pixels along an axis. Crops right side if the shape is not a multiple of factor. @@ -2987,27 +2988,13 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False): Whether to truncate the last chunk or average over a smaller number. e.g., if you downsample [1,2,3,4] by a factor of 3, you could get either [2] or [2,4] if truncate is True or False, respectively. + use_memmap : bool + Use a memory map on disk to avoid loading the whole cube into memory + (several times)? If set, the warning about large cubes can be ignored + (though you still have to override the warning) + progressbar : bool + Include a progress bar? Only works with ``use_memmap=True`` """ - # size of the dimension of interest - xs = self.shape[axis] - - if xs % int(factor) != 0: - if truncate: - view = [slice(None) for ii in range(self.ndim)] - view[axis] = slice(None,xs-(xs % int(factor))) - crarr = self.filled_data[view] - mask = self.mask[view].include() - else: - newshape = list(self.shape) - newshape[axis] = (factor - xs % int(factor)) - extension = np.empty(newshape) * np.nan - crarr = np.concatenate((self.filled_data[:], extension), axis=axis) - extension[:] = 0 - mask = np.concatenate((self.mask.include(), extension), axis=axis) - else: - crarr = self.filled_data[:] - mask = self.mask[:] - def makeslice(startpoint,axis=axis,step=factor): # make empty slices view = [slice(None) for ii in range(self.ndim)] @@ -3015,19 +3002,80 @@ def makeslice(startpoint,axis=axis,step=factor): view[axis] = slice(startpoint,None,step) return view - # The extra braces here are crucial: We're adding an extra dimension so we - # can average across it - stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in + # size of the dimension of interest + xs = self.shape[axis] + + if not use_memmap: + if xs % int(factor) != 0: + if truncate: + view = [slice(None) for ii in range(self.ndim)] + view[axis] = slice(None,xs-(xs % int(factor))) + crarr = self.filled_data[view] + mask = self.mask[view].include() + else: + extension_shape = list(self.shape) + extension_shape[axis] = (factor - xs % int(factor)) + extension = np.empty(extension_shape) * np.nan + crarr = np.concatenate((self.filled_data[:], extension), axis=axis) + extension[:] = 0 + mask = np.concatenate((self.mask.include(), extension), axis=axis) + else: + crarr = self.filled_data[:] + mask = self.mask[:] + + # The extra braces here are crucial: We're adding an extra dimension so we + # can average across it + stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in + range(factor)]) + + dsarr = estimator(stacked_array, axis=0) + + stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in range(factor)]) - dsarr = estimator(stacked_array, axis=0) + mask = np.any(stacked_array, axis=0) + else: + def makeslice_local(startpoint,axis=axis,nsteps=factor): + # make empty slices + view = [slice(None) for ii in range(self.ndim)] + # then fill the appropriate slice + view[axis] = slice(startpoint,startpoint+nsteps,1) + return view + + newshape = list(self.shape) + newshape[axis] = (newshape[axis]//factor + (int(truncate) + * (xs % int(factor) != 0))) + newshape = tuple(newshape) + + if progressbar: + progressbar = ProgressBar + else: + progressbar = lambda x: x + + ntf = tempfile.NamedTemporaryFile() + dsarr = np.memmap(ntf, mode='w+', shape=newshape, dtype=np.float) + ntf2 = tempfile.NamedTemporaryFile() + mask = np.memmap(ntf2, mode='w+', shape=newshape, dtype=np.bool) + for ii in progressbar(range(newshape[axis])): + view_fulldata = makeslice_local(ii*factor) + view_newdata = makeslice_local(ii, nsteps=1) - stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in - range(factor)]) + to_average = self.filled_data[view_fulldata] + to_anyfy = self.mask[view_fulldata].include() - mask = np.any(stacked_array, axis=0) + dsarr[view_newdata] = estimator(to_average, axis) + mask[view_newdata] = np.any(to_anyfy, axis).astype('bool') + + + view = makeslice(factor//2) + newwcs = wcs_utils.slice_wcs(self.wcs, view, shape=self.shape) + newwcs._naxis = list(self.shape) + + # this is an assertion to ensure that the WCS produced is valid + # (this is basically a regression test for #442) + assert newwcs[:, slice(None), slice(None)] + assert len(newwcs._naxis) == 3 - newwcs = self.wcs[makeslice(factor//2)] return self._new_cube_with(data=dsarr, wcs=newwcs, mask=BooleanArrayMask(mask, wcs=newwcs)) @@ -3213,6 +3261,7 @@ def __getitem__(self, view): # this is an assertion to ensure that the WCS produced is valid # (this is basically a regression test for #442) assert newwcs[:, slice(None), slice(None)] + assert len(newwcs._naxis) == 3 return self._new_cube_with(data=self._data[view], wcs=newwcs, diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py index d6b3b8199..766f84ba1 100644 --- a/spectral_cube/tests/test_regrid.py +++ b/spectral_cube/tests/test_regrid.py @@ -335,27 +335,30 @@ def test_nocelestial_reproject_2D_fail(): assert exc.value.args[0] == ("WCS does not contain two spatial axes.") -def test_downsample(): +@pytest.mark.parametrize('use_memmap', (True,False)) +def test_downsample(use_memmap): cube, data = cube_and_raw('255.fits') - dscube = cube.downsample_axis(factor=2, axis=0) + dscube = cube.downsample_axis(factor=2, axis=0, use_memmap=use_memmap) expected = data.mean(axis=0) np.testing.assert_almost_equal(expected[None,:,:], dscube.filled_data[:].value) - dscube = cube.downsample_axis(factor=2, axis=1) + dscube = cube.downsample_axis(factor=2, axis=1, use_memmap=use_memmap) expected = np.array([data[:,:2,:].mean(axis=1), data[:,2:4,:].mean(axis=1), data[:,4:,:].mean(axis=1), # just data[:,4,:] ]).swapaxes(0,1) + assert expected.shape == (2,3,5) np.testing.assert_almost_equal(expected, dscube.filled_data[:].value) - dscube = cube.downsample_axis(factor=2, axis=1, truncate=True) + dscube = cube.downsample_axis(factor=2, axis=1, truncate=True, + use_memmap=use_memmap) expected = np.array([data[:,:2,:].mean(axis=1), data[:,2:4,:].mean(axis=1), From ba61895570dd9251a94b45be8ad6ea201866f9f4 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Thu, 12 Jul 2018 16:23:48 -0600 Subject: [PATCH 6/7] fix a logical error wrt truncation --- spectral_cube/spectral_cube.py | 12 ++++++++---- spectral_cube/tests/test_regrid.py | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index 5ce6392d4..4025cdace 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -3040,7 +3040,7 @@ def makeslice(startpoint,axis=axis,step=factor): mask = np.any(stacked_array, axis=0) else: - def makeslice_local(startpoint,axis=axis,nsteps=factor): + def makeslice_local(startpoint, axis=axis, nsteps=factor): # make empty slices view = [slice(None) for ii in range(self.ndim)] # then fill the appropriate slice @@ -3048,7 +3048,7 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor): return view newshape = list(self.shape) - newshape[axis] = (newshape[axis]//factor + (int(truncate) + newshape[axis] = (newshape[axis]//factor + ((1-int(truncate)) * (xs % int(factor) != 0))) newshape = tuple(newshape) @@ -3057,6 +3057,10 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor): else: progressbar = lambda x: x + # Create a view that will add a blank newaxis at the right spot + view_newaxis = [slice(None) for ii in range(self.ndim)] + view_newaxis[axis] = None + ntf = tempfile.NamedTemporaryFile() dsarr = np.memmap(ntf, mode='w+', shape=newshape, dtype=np.float) ntf2 = tempfile.NamedTemporaryFile() @@ -3068,8 +3072,8 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor): to_average = self.filled_data[view_fulldata] to_anyfy = self.mask[view_fulldata].include() - dsarr[view_newdata] = estimator(to_average, axis) - mask[view_newdata] = np.any(to_anyfy, axis).astype('bool') + dsarr[view_newdata] = estimator(to_average, axis)[view_newaxis] + mask[view_newdata] = np.any(to_anyfy, axis).astype('bool')[view_newaxis] view = makeslice(factor//2) diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py index dc8bc06d3..c5f8933a6 100644 --- a/spectral_cube/tests/test_regrid.py +++ b/spectral_cube/tests/test_regrid.py @@ -342,6 +342,7 @@ def test_downsample(use_memmap): data[:,4:,:].mean(axis=1), # just data[:,4,:] ]).swapaxes(0,1) assert expected.shape == (2,3,5) + assert dscube.shape == (2,3,5) np.testing.assert_almost_equal(expected, dscube.filled_data[:].value) From 61c1524163c24355c588cecc06ca86c45feab9e3 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Thu, 12 Jul 2018 17:31:23 -0600 Subject: [PATCH 7/7] fix docstring --- spectral_cube/spectral_cube.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py index 4025cdace..0a4e2d229 100644 --- a/spectral_cube/spectral_cube.py +++ b/spectral_cube/spectral_cube.py @@ -2971,11 +2971,10 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean, The WCS will be 'downsampled' by the specified factor as well. If the downsample factor is odd, there will be an offset in the WCS. - This implementation is in-memory only; it is therefore prohibitively - expensive for gigantic cubes. There is an open TODO item to implement - a slice-iterative version. - - (original code came from image_tools) + There is both an in-memory and a memory-mapped implementation; the + default is to use the memory-mapped version. Technically, the 'large + data' warning doesn't apply when using the memory-mapped version, but + the warning is still there anyway. Parameters ----------