From 01b3add10e8a4312b24dcf1a01097c2a1961d055 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Tue, 29 May 2018 11:42:23 +0300
Subject: [PATCH 1/7] add downsample_axis code from image_tools.  Definitely
 need a test, since I don't really know what's going on with WCS yet

---
 spectral_cube/spectral_cube.py | 62 ++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index 99f1a66b4..8d2d5797b 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -2956,6 +2956,68 @@ def mask_channels(self, goodchannels):
         return self.with_mask(goodchannels[:,None,None])
 
 
+    @warn_slow
+    def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False):
+        """
+        Downsample the cube by averaging over *factor* pixels along an axis.
+        Crops right side if the shape is not a multiple of factor.
+
+        The WCS will be 'downsampled' by the specified factor as well.
+        If the downsample factor is odd, there will be an offset in the WCS.
+
+        (original code came from image_tools)
+
+        Parameters
+        ----------
+        myarr : `~numpy.ndarray`
+            The array to downsample
+        factor : int
+            The factor to downsample by
+        axis : int
+            The axis to downsample along
+        estimator : function
+            defaults to mean.  You can downsample by summing or
+            something else if you want a different estimator
+            (e.g., downsampling error: you want to sum & divide by sqrt(n))
+        truncate : bool
+            Whether to truncate the last chunk or average over a smaller number.
+            e.g., if you downsample [1,2,3,4] by a factor of 3, you could get either
+            [2] or [2,4] if truncate is True or False, respectively.
+        """
+        # size of the dimension of interest
+        xs = myarr.shape[axis]
+        
+        if xs % int(factor) != 0:
+            if truncate:
+                view = [slice(None) for ii in range(myarr.ndim)]
+                view[axis] = slice(None,xs-(xs % int(factor)))
+                crarr = myarr[view]
+            else:
+                newshape = list(myarr.shape)
+                newshape[axis] = (factor - xs % int(factor))
+                extension = numpy.empty(newshape) * numpy.nan
+                crarr = numpy.concatenate((myarr,extension), axis=axis)
+        else:
+            crarr = myarr
+
+        def makeslice(startpoint,axis=axis,step=factor):
+            # make empty slices
+            view = [slice(None) for ii in range(myarr.ndim)]
+            # then fill the appropriate slice
+            view[axis] = slice(startpoint,None,step)
+            return view
+
+        # The extra braces here are crucial: We're adding an extra dimension so we
+        # can average across it
+        stacked_array = numpy.concatenate([[crarr[makeslice(ii)]] for ii in
+                                           range(factor)])
+
+        dsarr = estimator(stacked_array, axis=0)
+
+        newwcs = self.wcs[makeslice(factor//2)]
+
+        return self._new_cube_with(data=dsarr, wcs=newwcs)
+
 class VaryingResolutionSpectralCube(BaseSpectralCube, MultiBeamMixinClass):
     """
     A variant of the SpectralCube class that has PSF (beam) information on a

From 814c3b4aa32ba32cb8614dadcbfd4c847450309b Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Tue, 29 May 2018 17:54:03 +0300
Subject: [PATCH 2/7] add a test and perform many corrections

---
 spectral_cube/spectral_cube.py     | 32 ++++++++++++++++++++----------
 spectral_cube/tests/test_regrid.py | 21 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index 8d2d5797b..a172ec4c9 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -2957,7 +2957,7 @@ def mask_channels(self, goodchannels):
 
 
     @warn_slow
-    def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False):
+    def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False):
         """
         Downsample the cube by averaging over *factor* pixels along an axis.
         Crops right side if the shape is not a multiple of factor.
@@ -2985,38 +2985,48 @@ def downsample_axis(myarr, factor, axis, estimator=numpy.nanmean, truncate=False
             [2] or [2,4] if truncate is True or False, respectively.
         """
         # size of the dimension of interest
-        xs = myarr.shape[axis]
+        xs = self.shape[axis]
         
         if xs % int(factor) != 0:
             if truncate:
-                view = [slice(None) for ii in range(myarr.ndim)]
+                view = [slice(None) for ii in range(self.ndim)]
                 view[axis] = slice(None,xs-(xs % int(factor)))
-                crarr = myarr[view]
+                crarr = self.filled_data[view]
+                mask = self.mask[view].include()
             else:
-                newshape = list(myarr.shape)
+                newshape = list(self.shape)
                 newshape[axis] = (factor - xs % int(factor))
-                extension = numpy.empty(newshape) * numpy.nan
-                crarr = numpy.concatenate((myarr,extension), axis=axis)
+                extension = np.empty(newshape) * np.nan
+                crarr = np.concatenate((self.filled_data[:], extension), axis=axis)
+                extension[:] = 0
+                mask = np.concatenate((self.mask.include(), extension), axis=axis)
         else:
-            crarr = myarr
+            crarr = self.filled_data[:]
+            mask = self.mask[:]
 
         def makeslice(startpoint,axis=axis,step=factor):
             # make empty slices
-            view = [slice(None) for ii in range(myarr.ndim)]
+            view = [slice(None) for ii in range(self.ndim)]
             # then fill the appropriate slice
             view[axis] = slice(startpoint,None,step)
             return view
 
         # The extra braces here are crucial: We're adding an extra dimension so we
         # can average across it
-        stacked_array = numpy.concatenate([[crarr[makeslice(ii)]] for ii in
+        stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in
                                            range(factor)])
 
         dsarr = estimator(stacked_array, axis=0)
 
+        stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in
+                                       range(factor)])
+
+        mask = np.any(stacked_array, axis=0)
+
         newwcs = self.wcs[makeslice(factor//2)]
 
-        return self._new_cube_with(data=dsarr, wcs=newwcs)
+        return self._new_cube_with(data=dsarr, wcs=newwcs,
+                                   mask=BooleanArrayMask(mask, wcs=newwcs))
 
 class VaryingResolutionSpectralCube(BaseSpectralCube, MultiBeamMixinClass):
     """
diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py
index 5139b3b6d..be6d11e48 100644
--- a/spectral_cube/tests/test_regrid.py
+++ b/spectral_cube/tests/test_regrid.py
@@ -333,3 +333,24 @@ def test_nocelestial_reproject_2D_fail():
         proj.reproject(cube.header)
 
     assert exc.value.args[0] == ("WCS does not contain two spatial axes.")
+
+
+def test_downsample():
+    cube, data = cube_and_raw('255.fits')
+
+    dscube = cube.downsample_axis(factor=2, axis=0)
+
+    expected = data.mean(axis=0)
+
+    np.testing.assert_almost_equal(expected[None,:,:],
+                                   dscube.filled_data[:].value)
+
+    dscube = cube.downsample_axis(factor=2, axis=1)
+
+    expected = np.array([data[:,:2,:].mean(axis=1),
+                         data[:,2:4,:].mean(axis=1),
+                         data[:,4:,:].mean(axis=1), # just data[:,4,:]
+                        ]).swapaxes(0,1)
+
+    np.testing.assert_almost_equal(expected,
+                                   dscube.filled_data[:].value)

From b75f06664931e04eadec4ac067e948a247cbba11 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Tue, 29 May 2018 17:54:49 +0300
Subject: [PATCH 3/7] add a truncation test

---
 spectral_cube/tests/test_regrid.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py
index be6d11e48..d6b3b8199 100644
--- a/spectral_cube/tests/test_regrid.py
+++ b/spectral_cube/tests/test_regrid.py
@@ -354,3 +354,12 @@ def test_downsample():
 
     np.testing.assert_almost_equal(expected,
                                    dscube.filled_data[:].value)
+
+    dscube = cube.downsample_axis(factor=2, axis=1, truncate=True)
+
+    expected = np.array([data[:,:2,:].mean(axis=1),
+                         data[:,2:4,:].mean(axis=1),
+                        ]).swapaxes(0,1)
+
+    np.testing.assert_almost_equal(expected,
+                                   dscube.filled_data[:].value)

From 4bcf66d3261de20aebcb46e9f52ec7501960cd03 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Tue, 29 May 2018 17:58:09 +0300
Subject: [PATCH 4/7] expanded docstring

---
 spectral_cube/spectral_cube.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index a172ec4c9..217d784bf 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -2965,6 +2965,10 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False):
         The WCS will be 'downsampled' by the specified factor as well.
         If the downsample factor is odd, there will be an offset in the WCS.
 
+        This implementation is in-memory only; it is therefore prohibitively
+        expensive for gigantic cubes.  There is an open TODO item to implement
+        a slice-iterative version.
+
         (original code came from image_tools)
 
         Parameters

From dd8d31e3f3dffa9e090ad90c4590a238e2a69e3b Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Sun, 8 Jul 2018 22:29:37 +0200
Subject: [PATCH 5/7] add the memory-free version and tests

---
 spectral_cube/spectral_cube.py     | 107 +++++++++++++++++++++--------
 spectral_cube/tests/test_regrid.py |  11 +--
 2 files changed, 85 insertions(+), 33 deletions(-)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index 217d784bf..9aef6a9d1 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -2957,7 +2957,8 @@ def mask_channels(self, goodchannels):
 
 
     @warn_slow
-    def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False):
+    def downsample_axis(self, factor, axis, estimator=np.nanmean,
+                        truncate=False, use_memmap=True, progressbar=True):
         """
         Downsample the cube by averaging over *factor* pixels along an axis.
         Crops right side if the shape is not a multiple of factor.
@@ -2987,27 +2988,13 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean, truncate=False):
             Whether to truncate the last chunk or average over a smaller number.
             e.g., if you downsample [1,2,3,4] by a factor of 3, you could get either
             [2] or [2,4] if truncate is True or False, respectively.
+        use_memmap : bool
+            Use a memory map on disk to avoid loading the whole cube into memory
+            (several times)?  If set, the warning about large cubes can be ignored
+            (though you still have to override the warning)
+        progressbar : bool
+            Include a progress bar?  Only works with ``use_memmap=True``
         """
-        # size of the dimension of interest
-        xs = self.shape[axis]
-        
-        if xs % int(factor) != 0:
-            if truncate:
-                view = [slice(None) for ii in range(self.ndim)]
-                view[axis] = slice(None,xs-(xs % int(factor)))
-                crarr = self.filled_data[view]
-                mask = self.mask[view].include()
-            else:
-                newshape = list(self.shape)
-                newshape[axis] = (factor - xs % int(factor))
-                extension = np.empty(newshape) * np.nan
-                crarr = np.concatenate((self.filled_data[:], extension), axis=axis)
-                extension[:] = 0
-                mask = np.concatenate((self.mask.include(), extension), axis=axis)
-        else:
-            crarr = self.filled_data[:]
-            mask = self.mask[:]
-
         def makeslice(startpoint,axis=axis,step=factor):
             # make empty slices
             view = [slice(None) for ii in range(self.ndim)]
@@ -3015,19 +3002,80 @@ def makeslice(startpoint,axis=axis,step=factor):
             view[axis] = slice(startpoint,None,step)
             return view
 
-        # The extra braces here are crucial: We're adding an extra dimension so we
-        # can average across it
-        stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in
+        # size of the dimension of interest
+        xs = self.shape[axis]
+
+        if not use_memmap:
+            if xs % int(factor) != 0:
+                if truncate:
+                    view = [slice(None) for ii in range(self.ndim)]
+                    view[axis] = slice(None,xs-(xs % int(factor)))
+                    crarr = self.filled_data[view]
+                    mask = self.mask[view].include()
+                else:
+                    extension_shape = list(self.shape)
+                    extension_shape[axis] = (factor - xs % int(factor))
+                    extension = np.empty(extension_shape) * np.nan
+                    crarr = np.concatenate((self.filled_data[:], extension), axis=axis)
+                    extension[:] = 0
+                    mask = np.concatenate((self.mask.include(), extension), axis=axis)
+            else:
+                crarr = self.filled_data[:]
+                mask = self.mask[:]
+
+            # The extra braces here are crucial: We're adding an extra dimension so we
+            # can average across it
+            stacked_array = np.concatenate([[crarr[makeslice(ii)]] for ii in
+                                               range(factor)])
+
+            dsarr = estimator(stacked_array, axis=0)
+
+            stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in
                                            range(factor)])
 
-        dsarr = estimator(stacked_array, axis=0)
+            mask = np.any(stacked_array, axis=0)
+        else:
+            def makeslice_local(startpoint,axis=axis,nsteps=factor):
+                # make empty slices
+                view = [slice(None) for ii in range(self.ndim)]
+                # then fill the appropriate slice
+                view[axis] = slice(startpoint,startpoint+nsteps,1)
+                return view
+
+            newshape = list(self.shape)
+            newshape[axis] = (newshape[axis]//factor + (int(truncate)
+                                                        * (xs % int(factor) != 0)))
+            newshape = tuple(newshape)
+
+            if progressbar:
+                progressbar = ProgressBar
+            else:
+                progressbar = lambda x: x
+
+            ntf = tempfile.NamedTemporaryFile()
+            dsarr = np.memmap(ntf, mode='w+', shape=newshape, dtype=np.float)
+            ntf2 = tempfile.NamedTemporaryFile()
+            mask = np.memmap(ntf2, mode='w+', shape=newshape, dtype=np.bool)
+            for ii in progressbar(range(newshape[axis])):
+                view_fulldata = makeslice_local(ii*factor)
+                view_newdata = makeslice_local(ii, nsteps=1)
 
-        stacked_mask = np.concatenate([[mask[makeslice(ii)]] for ii in
-                                       range(factor)])
+                to_average = self.filled_data[view_fulldata]
+                to_anyfy = self.mask[view_fulldata].include()
 
-        mask = np.any(stacked_array, axis=0)
+                dsarr[view_newdata] = estimator(to_average, axis)
+                mask[view_newdata] = np.any(to_anyfy, axis).astype('bool')
+
+
+        view = makeslice(factor//2)
+        newwcs = wcs_utils.slice_wcs(self.wcs, view, shape=self.shape)
+        newwcs._naxis = list(self.shape)
+
+        # this is an assertion to ensure that the WCS produced is valid
+        # (this is basically a regression test for #442)
+        assert newwcs[:, slice(None), slice(None)]
+        assert len(newwcs._naxis) == 3
 
-        newwcs = self.wcs[makeslice(factor//2)]
 
         return self._new_cube_with(data=dsarr, wcs=newwcs,
                                    mask=BooleanArrayMask(mask, wcs=newwcs))
@@ -3213,6 +3261,7 @@ def __getitem__(self, view):
         # this is an assertion to ensure that the WCS produced is valid
         # (this is basically a regression test for #442)
         assert newwcs[:, slice(None), slice(None)]
+        assert len(newwcs._naxis) == 3
 
         return self._new_cube_with(data=self._data[view],
                                    wcs=newwcs,
diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py
index d6b3b8199..766f84ba1 100644
--- a/spectral_cube/tests/test_regrid.py
+++ b/spectral_cube/tests/test_regrid.py
@@ -335,27 +335,30 @@ def test_nocelestial_reproject_2D_fail():
     assert exc.value.args[0] == ("WCS does not contain two spatial axes.")
 
 
-def test_downsample():
+@pytest.mark.parametrize('use_memmap', (True,False))
+def test_downsample(use_memmap):
     cube, data = cube_and_raw('255.fits')
 
-    dscube = cube.downsample_axis(factor=2, axis=0)
+    dscube = cube.downsample_axis(factor=2, axis=0, use_memmap=use_memmap)
 
     expected = data.mean(axis=0)
 
     np.testing.assert_almost_equal(expected[None,:,:],
                                    dscube.filled_data[:].value)
 
-    dscube = cube.downsample_axis(factor=2, axis=1)
+    dscube = cube.downsample_axis(factor=2, axis=1, use_memmap=use_memmap)
 
     expected = np.array([data[:,:2,:].mean(axis=1),
                          data[:,2:4,:].mean(axis=1),
                          data[:,4:,:].mean(axis=1), # just data[:,4,:]
                         ]).swapaxes(0,1)
+    assert expected.shape == (2,3,5)
 
     np.testing.assert_almost_equal(expected,
                                    dscube.filled_data[:].value)
 
-    dscube = cube.downsample_axis(factor=2, axis=1, truncate=True)
+    dscube = cube.downsample_axis(factor=2, axis=1, truncate=True,
+                                  use_memmap=use_memmap)
 
     expected = np.array([data[:,:2,:].mean(axis=1),
                          data[:,2:4,:].mean(axis=1),

From ba61895570dd9251a94b45be8ad6ea201866f9f4 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Thu, 12 Jul 2018 16:23:48 -0600
Subject: [PATCH 6/7] fix a logical error wrt truncation

---
 spectral_cube/spectral_cube.py     | 12 ++++++++----
 spectral_cube/tests/test_regrid.py |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index 5ce6392d4..4025cdace 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -3040,7 +3040,7 @@ def makeslice(startpoint,axis=axis,step=factor):
 
             mask = np.any(stacked_array, axis=0)
         else:
-            def makeslice_local(startpoint,axis=axis,nsteps=factor):
+            def makeslice_local(startpoint, axis=axis, nsteps=factor):
                 # make empty slices
                 view = [slice(None) for ii in range(self.ndim)]
                 # then fill the appropriate slice
@@ -3048,7 +3048,7 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor):
                 return view
 
             newshape = list(self.shape)
-            newshape[axis] = (newshape[axis]//factor + (int(truncate)
+            newshape[axis] = (newshape[axis]//factor + ((1-int(truncate))
                                                         * (xs % int(factor) != 0)))
             newshape = tuple(newshape)
 
@@ -3057,6 +3057,10 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor):
             else:
                 progressbar = lambda x: x
 
+            # Create a view that will add a blank newaxis at the right spot
+            view_newaxis = [slice(None) for ii in range(self.ndim)]
+            view_newaxis[axis] = None
+
             ntf = tempfile.NamedTemporaryFile()
             dsarr = np.memmap(ntf, mode='w+', shape=newshape, dtype=np.float)
             ntf2 = tempfile.NamedTemporaryFile()
@@ -3068,8 +3072,8 @@ def makeslice_local(startpoint,axis=axis,nsteps=factor):
                 to_average = self.filled_data[view_fulldata]
                 to_anyfy = self.mask[view_fulldata].include()
 
-                dsarr[view_newdata] = estimator(to_average, axis)
-                mask[view_newdata] = np.any(to_anyfy, axis).astype('bool')
+                dsarr[view_newdata] = estimator(to_average, axis)[view_newaxis]
+                mask[view_newdata] = np.any(to_anyfy, axis).astype('bool')[view_newaxis]
 
 
         view = makeslice(factor//2)
diff --git a/spectral_cube/tests/test_regrid.py b/spectral_cube/tests/test_regrid.py
index dc8bc06d3..c5f8933a6 100644
--- a/spectral_cube/tests/test_regrid.py
+++ b/spectral_cube/tests/test_regrid.py
@@ -342,6 +342,7 @@ def test_downsample(use_memmap):
                          data[:,4:,:].mean(axis=1), # just data[:,4,:]
                         ]).swapaxes(0,1)
     assert expected.shape == (2,3,5)
+    assert dscube.shape == (2,3,5)
 
     np.testing.assert_almost_equal(expected,
                                    dscube.filled_data[:].value)

From 61c1524163c24355c588cecc06ca86c45feab9e3 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" <keflavich@gmail.com>
Date: Thu, 12 Jul 2018 17:31:23 -0600
Subject: [PATCH 7/7] fix docstring

---
 spectral_cube/spectral_cube.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/spectral_cube/spectral_cube.py b/spectral_cube/spectral_cube.py
index 4025cdace..0a4e2d229 100644
--- a/spectral_cube/spectral_cube.py
+++ b/spectral_cube/spectral_cube.py
@@ -2971,11 +2971,10 @@ def downsample_axis(self, factor, axis, estimator=np.nanmean,
         The WCS will be 'downsampled' by the specified factor as well.
         If the downsample factor is odd, there will be an offset in the WCS.
 
-        This implementation is in-memory only; it is therefore prohibitively
-        expensive for gigantic cubes.  There is an open TODO item to implement
-        a slice-iterative version.
-
-        (original code came from image_tools)
+        There is both an in-memory and a memory-mapped implementation; the
+        default is to use the memory-mapped version.  Technically, the 'large
+        data' warning doesn't apply when using the memory-mapped version, but
+        the warning is still there anyway.
 
         Parameters
         ----------