added rect mode to _fit_basis_2d and unittest.

HERA-Team · Aug 5, 2020 · 06e14b5 · 06e14b5
1 parent 45103a4
commit 06e14b5
Show file tree

Hide file tree

Showing 2 changed files with 135 additions and 50 deletions.
diff --git a/uvtools/dspec.py b/uvtools/dspec.py
@@ -21,10 +21,12 @@
                  'max_contiguous_edge_flags' : 10}
 DPSS_DEFAULTS_1D = {'suppression_factors' :  [1e-9],
                'eigenval_cutoff' : [1e-12],
-               'max_contiguous_edge_flags' : 10}
+               'max_contiguous_edge_flags' : 10,
+               'filt2d_mode' : 'outer'}
 DFT_DEFAULTS_1D = {'suppression_factors' : [1e-9],
                 'fundamental_period' : np.nan,
-                'max_contiguous_edge_flags' : 10}
+                'max_contiguous_edge_flags' : 10,
+                'filt2d_mode' : 'outer'}
 
 CLEAN_DEFAULTS_2D = {'tol':1e-9, 'window': ['none', 'none'],
  'alpha':.5, 'maxiter':100, 'gain':0.1,
@@ -34,10 +36,12 @@
                  'max_contiguous_edge_flags' : 10}
 DPSS_DEFAULTS_2D = {'suppression_factors' : [[1e-9], [1e-9]],
                'eigenval_cutoff' : [[1e-12], [1e-12]],
-               'max_contiguous_edge_flags' : 10}
+               'max_contiguous_edge_flags' : 10,
+               'filt2d_mode' : 'outer'}
 DFT_DEFAULTS_2D = {'suppression_factors' : [[1e-9], [1e-9]],
                 'fundamental_period' : [np.nan, np.nan],
-                'max_contiguous_edge_flags' : 10}
+                'max_contiguous_edge_flags' : 10,
+                'filt2d_mode' : 'outer'}
 
 
 def _process_filter_kwargs(kwarg_dict, default_dict):
@@ -139,7 +143,11 @@ def _fourier_filter_hash(filter_centers, filter_half_widths,
     + ('filter_half_widths x N x DF:',) + tuple(np.round(np.asarray(filter_half_widths) * np.mean(np.diff(x)) * len(x), hash_decimal))\
     + ('filter_factors x 1e9:',) + tuple(np.round(np.asarray(filter_factors) * 1e9, hash_decimal))
     if w is not None:
-        filter_key = filter_key + ('weights', ) +  tuple(np.round(w.tolist(), hash_decimal))
+        # hash binary flags as booleans
+        if np.all((w == 0) | (w == 1)):
+            filter_key = filter_key + ('weights', ) +  tuple(w.astype(bool).tolist())
+        else:
+            filter_key = filter_key + ('weights', ) +  tuple(np.round(w.tolist(), hash_decimal))
     filter_key = filter_key + tuple([kwargs[k] for k in kwargs])
     return filter_key
 
@@ -452,10 +460,11 @@ def fourier_filter(x, data, wgts, filter_centers, filter_half_widths, mode,
                                                      max_contiguous_edge_flags=max_contiguous_edge_flags)
                        model = data - residual
                        if len(mode) > 1:
+                           filt2d_mode = filter_kwargs.pop('filt2d_mode')
                            model, _, info_deconv = _fit_basis_2d(x=x, data=model, filter_centers=filter_centers, filter_dims=filter_dims_d,
                                                                  skip_wgt=skip_wgt, basis=mode[1], method=mode[2], wgts=wgts, basis_options=filter_kwargs,
                                                                  filter_half_widths=filter_half_widths, suppression_factors=suppression_factors,
-                                                                 cache=cache, max_contiguous_edge_flags=max_contiguous_edge_flags)
+                                                                 cache=cache, max_contiguous_edge_flags=max_contiguous_edge_flags, filt2d_mode=filt2d_mode)
                            info['info_deconv']=info_deconv
 
                    elif mode[0] in ['dft', 'dpss']:
@@ -468,6 +477,7 @@ def fourier_filter(x, data, wgts, filter_centers, filter_half_widths, mode,
                            filter_dims_d = [1]
                        suppression_factors = filter_kwargs.pop('suppression_factors')
                        max_contiguous_edge_flags = filter_kwargs.pop('max_contiguous_edge_flags')
+                       filt2d_mode = filter_kwargs.pop('filt2d_mode')
                        #if filter2d is True, create fitting_options that is a 2-list for 0 and 1 dimension
                        model, residual, info = _fit_basis_2d(x=x, data=data, filter_centers=filter_centers, filter_dims=filter_dims_d,
                                                            skip_wgt=skip_wgt, basis=mode[0], method=mode[1], wgts=wgts, basis_options=filter_kwargs,
@@ -1737,7 +1747,7 @@ def _clean_filter(x, data, wgts, filter_centers, filter_half_widths,
 
 def _fit_basis_2d(x, data, wgts, filter_centers, filter_half_widths,
                 basis_options, suppression_factors=None,
-                method='leastsq', basis='dft', cache=None,
+                method='leastsq', basis='dft', cache=None, filt2d_mode='outer',
                 filter_dims = 1, skip_wgt=0.1, max_contiguous_edge_flags=5):
     """
     A 1d linear-least-squares fitting function for computing models and residuals for fitting of the form
@@ -1811,6 +1821,12 @@ def _fit_basis_2d(x, data, wgts, filter_centers, filter_half_widths,
         specify dimension to filter. default 1,
         and if 2d filter, will use both dimensions.
 
+    filt2d_mode, string optional
+        can be in {"rect", "outer"}. If "rect" treat each filter center and filter width in time as defining the bounds
+        of a separate rectangle that is independently fitted and subtracted from the data in Fourier space. If "outer"
+        then remove all combinations of rectangular regions that arise from the provided list of time and frequency
+        regions. Default is "outer".
+
     skip_wgt: skips filtering rows with very low total weight (unflagged fraction ~< skip_wgt).
         Model is left as 0s, residual is left as data, and info is {'skipped': True} for that
         time. Only works properly when all weights are all between 0 and 1.
@@ -1865,51 +1881,107 @@ def _fit_basis_2d(x, data, wgts, filter_centers, filter_half_widths,
         basis_options = [{k:basis_options[k][0] for k in basis_options}, {k:basis_options[k][1] for k in basis_options}]
     #filter -1 dimension
     model = np.zeros_like(data)
-    for i, _y, _w, in zip(range(data.shape[0]), data, wgts):
-        if np.count_nonzero(_w)/len(_w) >= skip_wgt and np.count_nonzero(_w[:max_contiguous_edge_flags]) > 0 \
-                                                        and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
-            model[i], _, info_t = _fit_basis_1d(x=x[1], y=_y, w=_w, filter_centers=filter_centers[1],
-                                            filter_half_widths=filter_half_widths[1],
-                                            suppression_factors=suppression_factors[1],
-                                            basis_options=basis_options[1], method=method,
-                                            basis=basis, cache=cache)
-            info['status']['axis_1'][i] = 'success'
-        else:
-            info['status']['axis_1'][i] = 'skipped'
-    #and if filter2d, filter the 0 dimension. Note that we feed in the 'model'
-    #set wgts for time filtering to happen on skipped rows
-    info['filter_params'] = {'axis_0':{}, 'axis_1':{}}
-    if np.any([info['status']['axis_1'][i] == 'success' for i in info['status']['axis_1']]):
-        info['filter_params']['axis_1']['method'] = info_t['method']
-        info['filter_params']['axis_1']['basis'] = info_t['basis']
-        info['filter_params']['axis_1']['filter_centers'] = info_t['filter_centers']
-        info['filter_params']['axis_1']['filter_half_widths'] = info_t['filter_half_widths']
-        info['filter_params']['axis_1']['suppression_factors'] = info_t['suppression_factors']
-        info['filter_params']['axis_1']['basis_options'] = info_t['basis_options']
-        info['filter_params']['axis_1']['mode'] = info_t['basis'] + '_' + method
-    if filter2d:
-        wgts_time = np.ones_like(wgts)
-        for i in range(data.shape[0]):
-            if info['status']['axis_1'][i] == 'skipped':
-                wgts_time[i] = 0.
-        for i, _y, _w, in zip(range(model.shape[1]), model.T, wgts_time.T):
+    if filt2d_mode == 'outer' or not filter2d:
+        for i, _y, _w, in zip(range(data.shape[0]), data, wgts):
             if np.count_nonzero(_w)/len(_w) >= skip_wgt and np.count_nonzero(_w[:max_contiguous_edge_flags]) > 0 \
-               and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
-                model.T[i], _, info_t = _fit_basis_1d(x=x[0], y=_y, w=_w, filter_centers=filter_centers[0],
-                                                                 filter_half_widths=filter_half_widths[0],
-                                                                 suppression_factors=suppression_factors[0],
-                                                                 basis_options=basis_options[0], method=method,
-                                                                 basis=basis, cache=cache)
-                info['status']['axis_0'][i] = 'success'
+                                                            and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
+                model[i], _, info_t = _fit_basis_1d(x=x[1], y=_y, w=_w, filter_centers=filter_centers[1],
+                                                filter_half_widths=filter_half_widths[1],
+                                                suppression_factors=suppression_factors[1],
+                                                basis_options=basis_options[1], method=method,
+                                                basis=basis, cache=cache)
+                info['status']['axis_1'][i] = 'success'
             else:
-                info['status']['axis_0'][i] = 'skipped'
-        if np.any([info['status']['axis_0'][i] == 'success' for i in info['status']['axis_0']]):
-            info['filter_params']['axis_0']['method'] = info_t['method']
-            info['filter_params']['axis_0']['basis'] = info_t['basis']
-            info['filter_params']['axis_0']['filter_centers'] = info_t['filter_centers']
-            info['filter_params']['axis_0']['filter_half_widths'] = info_t['filter_half_widths']
-            info['filter_params']['axis_0']['suppression_factors'] = info_t['suppression_factors']
-            info['filter_params']['axis_0']['basis_options'] = info_t['basis_options']
+                info['status']['axis_1'][i] = 'skipped'
+        #and if filter2d, filter the 0 dimension. Note that we feed in the 'model'
+        #set wgts for time filtering to happen on skipped rows
+        info['filter_params'] = {'axis_0':{}, 'axis_1':{}}
+        if np.any([info['status']['axis_1'][i] == 'success' for i in info['status']['axis_1']]):
+            info['filter_params']['axis_1']['method'] = info_t['method']
+            info['filter_params']['axis_1']['basis'] = info_t['basis']
+            info['filter_params']['axis_1']['filter_centers'] = info_t['filter_centers']
+            info['filter_params']['axis_1']['filter_half_widths'] = info_t['filter_half_widths']
+            info['filter_params']['axis_1']['suppression_factors'] = info_t['suppression_factors']
+            info['filter_params']['axis_1']['basis_options'] = info_t['basis_options']
+            info['filter_params']['axis_1']['mode'] = info_t['basis'] + '_' + method
+        if filter2d:
+            wgts_time = np.ones_like(wgts)
+            for i in range(data.shape[0]):
+                if info['status']['axis_1'][i] == 'skipped':
+                    wgts_time[i] = 0.
+            for i, _y, _w, in zip(range(model.shape[1]), model.T, wgts_time.T):
+                if np.count_nonzero(_w)/len(_w) >= skip_wgt and np.count_nonzero(_w[:max_contiguous_edge_flags]) > 0 \
+                   and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
+                    model.T[i], _, info_t = _fit_basis_1d(x=x[0], y=_y, w=_w, filter_centers=filter_centers[0],
+                                                                     filter_half_widths=filter_half_widths[0],
+                                                                     suppression_factors=suppression_factors[0],
+                                                                     basis_options=basis_options[0], method=method,
+                                                                     basis=basis, cache=cache)
+                    info['status']['axis_0'][i] = 'success'
+                else:
+                    info['status']['axis_0'][i] = 'skipped'
+            if np.any([info['status']['axis_0'][i] == 'success' for i in info['status']['axis_0']]):
+                info['filter_params']['axis_0']['method'] = info_t['method']
+                info['filter_params']['axis_0']['basis'] = info_t['basis']
+                info['filter_params']['axis_0']['filter_centers'] = info_t['filter_centers']
+                info['filter_params']['axis_0']['filter_half_widths'] = info_t['filter_half_widths']
+                info['filter_params']['axis_0']['suppression_factors'] = info_t['suppression_factors']
+                info['filter_params']['axis_0']['basis_options'] = info_t['basis_options']
+    elif filt2d_mode == 'rect':
+        residual = copy.deepcopy(data)
+        model = np.zeros_like(data)
+        # if filt2d_mode is rect, then iterate through each pair of freq-time rectangular bounds
+        info['filter_params'] = {'axis_0':{'round%d'%r:{} for r in range(len(filter_centers[0]))}, 'axis_1':{'round%d'%r:{} for r in range(len(filter_centers[0]))}}
+        for stepnum, fc_t, fw_t, sf_t, fc_f, fw_f, sf_f in zip(range(len(filter_centers[0])), filter_centers[0], filter_half_widths[0], suppression_factors[0], filter_centers[1], filter_half_widths[1], suppression_factors[1]):
+            # construct model in frequency direction.
+            model_temp = np.zeros_like(data)
+            for i, _y, _w, in zip(range(data.shape[0]), residual, wgts):
+                if np.count_nonzero(_w)/len(_w) >= skip_wgt and np.count_nonzero(_w[:max_contiguous_edge_flags]) > 0 \
+                                                                and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
+                    model_temp[i], _, info_t = _fit_basis_1d(x=x[1], y=_y, w=_w, filter_centers=[fc_f],
+                                                    filter_half_widths=[fw_f],
+                                                    suppression_factors=[sf_f],
+                                                    basis_options=basis_options[1], method=method,
+                                                    basis=basis, cache=cache)
+                    info['status']['axis_1'][i] = 'success'
+                else:
+                    info['status']['axis_1'][i] = 'skipped'
+                if np.any([info['status']['axis_1'][i] == 'success' for i in info['status']['axis_1']]):
+                    info['filter_params']['axis_1']['round%d'%stepnum]['method'] = info_t['method']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['basis'] = info_t['basis']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['filter_centers'] = info_t['filter_centers']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['filter_half_widths'] = info_t['filter_half_widths']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['suppression_factors'] = info_t['suppression_factors']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['basis_options'] = info_t['basis_options']
+                    info['filter_params']['axis_1']['round%d'%stepnum]['mode'] = info_t['basis'] + '_' + method
+            wgts_time = np.ones_like(wgts)
+            for i in range(data.shape[0]):
+                if info['status']['axis_1'][i] == 'skipped':
+                    wgts_time[i] = 0.
+            # restrict its time evolution in the time-direction to be consisting of time-basis functions.
+            for i, _y, _w, in zip(range(model.shape[1]), model_temp.T, wgts_time.T):
+                if np.count_nonzero(_w)/len(_w) >= skip_wgt and np.count_nonzero(_w[:max_contiguous_edge_flags]) > 0 \
+                   and np.count_nonzero(_w[-max_contiguous_edge_flags:]) >0:
+                    model_temp.T[i], _, info_t = _fit_basis_1d(x=x[0], y=_y, w=_w, filter_centers=[fc_t],
+                                                                     filter_half_widths=[fw_t],
+                                                                     suppression_factors=[sf_t],
+                                                                     basis_options=basis_options[0], method=method,
+                                                                     basis=basis, cache=cache)
+                    info['status']['axis_0'][i] = 'success'
+                else:
+                    info['status']['axis_0'][i] = 'skipped'
+                if np.any([info['status']['axis_0'][i] == 'success' for i in info['status']['axis_0']]):
+                    info['filter_params']['axis_0']['round%d'%stepnum]['method'] = info_t['method']
+                    info['filter_params']['axis_0']['round%d'%stepnum]['basis'] = info_t['basis']
+                    info['filter_params']['axis_0']['round%d'%stepnum]['filter_centers'] = info_t['filter_centers']
+                    info['filter_params']['axis_0']['round%d'%stepnum]['filter_half_widths'] = info_t['filter_half_widths']
+                    info['filter_params']['axis_0']['round%d'%stepnum]['suppression_factors'] = info_t['suppression_factors']
+                    info['filter_params']['axis_0']['round%d'%stepnum]['basis_options'] = info_t['basis_options']
+            # and subtract it (effectively subtracting rectangular Fourier model)
+            residual = (residual - model_temp) * (np.abs(wgts) > 0.).astype(float)
+            # and add the model to the overall model.
+            model += model_temp
+            # repeate this for all the rectangle bounds provided.
 
     residual = (data - model) * (np.abs(wgts) > 0).astype(float)
     #this will only happen if filter_dims is only zero!

diff --git a/uvtools/tests/test_dspec.py b/uvtools/tests/test_dspec.py
@@ -890,6 +890,19 @@ def get_snr(clean, fftax=1, avgax=0, modes=[2, 20]):
     nt.assert_raises(ValueError, dspec.fourier_filter,x=flog, data=d, wgts=w, filter_centers=[0.],
                                                   filter_half_widths=[bl_len],
                                                   mode='clean', filter_dims=[1], **{'tol':1e-5})
+
+    # check that 2d clean with single rectangular region is the same for outer and rect filter2d modes.
+    dpss_options1_2d = {'eigenval_cutoff': [[1e-12], [1e-12]], 'filt2d_mode': 'rect'}
+    dpss_options2_2d = {'eigenval_cutoff': [[1e-12], [1e-12]], 'filt2d_mode': 'outer'}
+    mdl1, res1, info1 = dspec.fourier_filter(x=[times, freqs], data=d, wgts=w, filter_centers=[[0.],[0.]],
+                                             filter_half_widths=[[fr_len],[bl_len]], suppression_factors=[[1e-8],[1e-8]],
+                                             mode='dayenu_dpss_leastsq', filter_dims=[1, 0], **dpss_options1_2d)
+    mdl2, res2, info2 = dspec.fourier_filter(x=[times, freqs], data=d, wgts=w, filter_centers=[[0.],[0.]],
+                                             filter_half_widths=[[fr_len],[bl_len]], suppression_factors=[[1e-8],[1e-8]],
+                                             mode='dayenu_dpss_leastsq', filter_dims=[1, 0], **dpss_options2_2d)
+    nt.assert_true(np.all(np.isclose(mdl1, mdl2)))
+    nt.assert_true(np.all(np.isclose(res1, res2)))
+
 def test_vis_clean():
     # validate that fourier_filter in various clean modes gives close values to vis_clean with equivalent parameters!
     uvd = UVData()