Implement pol_convention in pstokes module (#404)

* Added pol_convention to _combine_pol and related tests * Added method _combine_pol_arrays
HERA-Team · Jul 19, 2024 · 4151d80 · 4151d80
1 parent 8950367
commit 4151d80
Show file tree

Hide file tree

Showing 2 changed files with 209 additions and 29 deletions.
diff --git a/hera_pspec/pstokes.py b/hera_pspec/pstokes.py
@@ -5,7 +5,9 @@
 import pyuvdata
 import copy
 from collections import OrderedDict as odict
+from collections.abc import Iterable
 import argparse
+import warnings
 
 from . import version, __version__
 
@@ -86,8 +88,10 @@ def _combine_pol(uvd1, uvd2, pol1, pol2, pstokes='pI', x_orientation=None):
         Polarization of the second UVData object to use in constructing
         pStokes visibility.
 
-    pstokes: Pseudo-stokes polarization to form, type: str
-        Pseudo stokes polarization to form, can be 'pI' or 'pQ' or 'pU' or 'pV'.
+    pstokes: Pseudo-stokes polarization to form, type: str or int
+        Pseudo stokes polarization to form, can be in a number
+        or a string ('pI', 'pQ', 'pU', or 'pV') , in which case pyuvdata 
+        is used to make the conversion to integer.
         Default: pI
 
     x_orientation: str, optional
@@ -103,23 +107,113 @@ def _combine_pol(uvd1, uvd2, pol1, pol2, pstokes='pI', x_orientation=None):
     assert isinstance(uvd2, pyuvdata.UVData), \
         "uvd2 must be a pyuvdata.UVData instance"
 
+    for i, uvd in enumerate([uvd1, uvd2]):
+        pol_convention = getattr(uvd, 'pol_convention', None)
+        if pol_convention is None: 
+            warnings.warn(
+                f'No polarization convention in uvd{i+1}. '
+                'Considering it to be "avg".')
+            setattr(uvd, 'pol_convention', 'avg')
+
+    if uvd1.pol_convention != uvd2.pol_convention:
+        raise ValueError('pol_convention of uvd1 and uvd2 are different.')
+
+    # combine arrays according to polarization convention
+    combined_data, combined_flags, combined_nsamples = _combine_pol_arrays(
+        pol1, pol2, pstokes, uvd1.pol_convention,
+        [uvd1.data_array, uvd2.data_array],
+        [uvd1.flag_array, uvd2.flag_array],
+        [uvd1.nsample_array, uvd2.nsample_array])
+
+    # convert pStokes to polarization integer if a string
+    if isinstance(pstokes, str):
+        pstokes = pyuvdata.utils.polstr2num(pstokes, x_orientation=x_orientation)
+
+    # assigning and writing data, flags and metadata to UVData object
+    uvdS = copy.deepcopy(uvd1)
+    uvdS.data_array = combined_data # pseudo-stokes data
+    uvdS.flag_array = combined_flags # flag array
+    uvdS.polarization_array = np.array([pstokes], dtype=int) # polarization number
+    uvdS.nsample_array = combined_nsamples
+
+    uvdS.history = f"Merged into pseudo-stokes vis with hera_pspec version {__version__}\n"+\
+                   "-"*20+f"\ndset1 history:\n{uvd1.history}\n"+'-'*20+\
+                   f"\ndset2 history:{uvd2.history}\n"
+    return uvdS
+
+
+def _combine_pol_arrays(
+        pol1, pol2, pstokes, pol_convention='avg',
+        data_list=None, flags_list=None, nsamples_list=None,
+        x_orientation=None
+    ):
+    """
+    Combines arrays to form the corresponding pseudo-stokes results
+    as arrays.
+
+    Parameters
+    ----------
+    pol1 : Polarization, type: str
+        Polarization of the first UVData object to use in constructing
+        pStokes visibility.
+
+    pol2 : Polarization, type: str
+        Polarization of the second UVData object to use in constructing
+        pStokes visibility.
+
+    pstokes: Pseudo-stokes polarization to form, type: str or int
+        Pseudo stokes polarization to form, can be in a number
+        or a string ('pI', 'pQ', 'pU', or 'pV') in which case pyuvdata
+        is used to make the conversion to integer.
+        Default: pI.
+
+    pol_convention: str ('avg' or 'sum')
+        Convention used to form polarization. For linear polarizations ``XX``
+        and ``YY``, the stokes ``I`` sky emission can be mapped to
+        ``I = (XX + YY)/2`` (the ``avg`` convention) or ``I = XX + YY`` 
+        (the ``sum`` convention).
+
+    data_list : any length 2 iterable of numpy arrays
+        Iterable of data arrays to be combined to form their pseudo-Stokes equivalent.
+        If only one is given, it is duplicated. For the ``avg`` convention, the
+        data arrays are combined as ``(data1 + data2)/2``, in the the ``sum`` 
+        convention, the output is ``data1 + data2``.
+        Default is None.
+
+    flags_list :  any length 2 iterable of numpy arrays
+        Iterable of flag arrays to be combined to form their pseudo-Stokes equivalent.
+        If only one is given, it is duplicated.
+        Flags are combined with a logical OR.
+        Default is None.
+
+    nsamples_list :  any length 2 iterable of numpy arrays
+        Iterable of nsamples arrays to be combined to form their pseudo-Stokes equivalent.
+        nsamples are combined to preserve proper variance, see hera_pspec issue #391.
+        If only one is given, it is duplicated.
+        Default is None.
+
+    x_orientation: str, optional
+        Orientation in cardinal direction east or north of X dipole.
+        Default keeps polarization in X and Y basis.
+
+    Returns
+    -------
+    combined_data : array
+        Array of pseudo-Stokes values obtained from combining the arrays in data.
+
+    combined_flags : array
+        Array of samples obtained from combining the arrays in nsamples.
+
+    combined_nsamples : array
+        Array of pseudo-Stokes values obtained from combining the arrays in data.
+
+    """
     # convert pol1 and/or pol2 to integer if fed as a string
     if isinstance(pol1, str):
         pol1 = pyuvdata.utils.polstr2num(pol1, x_orientation=x_orientation)
     if isinstance(pol2, str):
         pol2 = pyuvdata.utils.polstr2num(pol2, x_orientation=x_orientation)
 
-    # extracting data array from the UVData objects
-    data1 = uvd1.data_array
-    data2 = uvd2.data_array
-
-    # extracting flag array from the UVdata objects
-    flag1 = uvd1.flag_array
-    flag2 = uvd2.flag_array
-
-    # constructing flags (boolean)
-    flag = np.logical_or(flag1, flag2)
-
     # convert pStokes to polarization integer if a string
     if isinstance(pstokes, str):
         pstokes = pyuvdata.utils.polstr2num(pstokes, x_orientation=x_orientation)
@@ -137,23 +231,46 @@ def _combine_pol(uvd1, uvd2, pol1, pol2, pstokes='pI', x_orientation=None):
     assert pol2 in pol_weights[pstokes], \
         "pol2 {} not used in constructing pstokes {}".format(pol2_str, pstokes_str)
 
-    # constructing Stokes visibilities
-    stdata = 0.5 * (pol_weights[pstokes][pol1]*data1 + pol_weights[pstokes][pol2]*data2)
+    # assert pol_convention makes sense
+    if pol_convention not in ['avg', 'sum']:
+        raise ValueError('pol_convention must be avg or sum.')
+
+    # checks on input lists
+    for a_list in [data_list, flags_list, nsamples_list]:
+        if a_list is None:
+            continue
+        if not isinstance(a_list, Iterable):
+            raise ValueError('Data, flags and nsamples inputs must be iterables')
+        elif len(a_list) != 2:
+            raise ValueError("Can only combine two arrays.")
+        assert np.shape(a_list[0]) == np.shape(a_list[1]), \
+            "Arrays in list must have identical shape."
 
-    # assigning and writing data, flags and metadata to UVData object
-    uvdS = copy.deepcopy(uvd1)
-    uvdS.data_array = stdata  # pseudo-stokes data
-    uvdS.flag_array = flag  # flag array
-    uvdS.polarization_array = np.array([pstokes], dtype=int) # polarization number
-    # nsamples combined to preserve proper variance, see hera_pspec issue #391
-    uvdS.nsample_array = 4 * (uvd1.nsample_array**-1 + uvd2.nsample_array**-1)**-1 
+    # constructing flags (boolean)
+    if flags_list is not None:
+        combined_flags = np.logical_or(flags_list[0], flags_list[1])
+    else:
+        combined_flags = None
+
+    # constructing Stokes visibilities
+    if data_list is not None:
+        combined_data = \
+            (pol_weights[pstokes][pol1]*data_list[0] \
+            + pol_weights[pstokes][pol2]*data_list[1])
+        if pol_convention == 'avg':
+            combined_data *= 0.5
+    else:
+        combined_data = None
 
-    uvdS.history = "Merged into pseudo-stokes vis with hera_pspec version {}\n{}" \
-                    "{}{}{}{}\n".format(__version__, "-"*20+'\n',
-                    'dset1 history:\n', uvd1.history, '\n'+'-'*20+'\ndset2 history:\n',
-                    uvd2.history)
+    # constructing nsamples
+    if nsamples_list is not None:
+        combined_nsamples = (nsamples_list[0]**-1 + nsamples_list[1]**-1)**-1
+        if pol_convention == 'avg':
+            combined_nsamples *= 4.
+    else:
+        combined_nsamples = None
 
-    return uvdS
+    return combined_data, combined_flags, combined_nsamples
 
 
 def construct_pstokes(dset1, dset2, pstokes='pI', run_check=True, antenna_nums=None,

diff --git a/hera_pspec/tests/test_pstokes.py b/hera_pspec/tests/test_pstokes.py
@@ -30,19 +30,82 @@ def runTest(self):
         pass
 
     def test_combine_pol(self):
-        uvd1 = self.uvd1
-        uvd2 = self.uvd2
+        uvd1 = copy.deepcopy(self.uvd1)
+        uvd2 = copy.deepcopy(self.uvd2)
 
         # basic execution on pol strings
         out1 = pstokes._combine_pol(uvd1, uvd2, 'XX', 'YY')
         # again w/ pol ints
         out2 = pstokes._combine_pol(uvd1, uvd2, -5, -6)
         # assert equivalence
         assert out1 == out2
+        # basic execution with different polarization conventions
+        # out1 assumed avg by default
+        setattr(uvd1, 'pol_convention', 'sum')
+        setattr(uvd2, 'pol_convention', 'sum')
+        out3 = pstokes._combine_pol(uvd1, uvd2, 'XX', 'YY')
+        assert np.allclose(out3.data_array, out1.data_array * 2.)
+        assert np.allclose(out3.nsample_array, out1.nsample_array / 4.)
 
         # check exceptions
         pytest.raises(AssertionError, pstokes._combine_pol, dset1, dset2, 'XX', 'YY' )
         pytest.raises(AssertionError, pstokes._combine_pol, uvd1, uvd2, 'XX', 1)
+        # if different polarization conventions
+        setattr(uvd1, 'pol_convention', 'avg')
+        pytest.raises(ValueError, pstokes._combine_pol, uvd1, uvd2, 'XX', 'YY')
+
+    def test_combine_pol_arrays(self):
+        uvd1 = copy.deepcopy(self.uvd1)
+        uvd2 = copy.deepcopy(self.uvd2)
+
+        # proper usage
+        d1, f1, ns1 = pstokes._combine_pol_arrays(
+            pol1=-5,
+            pol2=-6,
+            pstokes='pI',
+            pol_convention='avg',
+            data_list=[uvd1.data_array, uvd2.data_array],
+            flags_list=[uvd1.flag_array, uvd2.flag_array],
+            nsamples_list=[uvd1.nsample_array, uvd2.nsample_array]
+        )
+        # inputs can be None
+        d2, f2, ns2 = pstokes._combine_pol_arrays(
+            pol1=-5,
+            pol2=-6,
+            pstokes='pI',
+            pol_convention='avg',
+            data_list=None,
+            flags_list=None,
+            nsamples_list=None
+        )
+        assert d2 is None
+        assert f2 is None
+        assert ns2 is None
+        # polarizations can be strings     
+        d3, f3, ns3 = pstokes._combine_pol_arrays(
+            pol1='XX',
+            pol2='YY',
+            pstokes='pI',
+            pol_convention='avg',
+            data_list=[uvd1.data_array, uvd2.data_array],
+            flags_list=[uvd1.flag_array, uvd2.flag_array],
+            nsamples_list=[uvd1.nsample_array, uvd2.nsample_array]
+        )
+        assert np.allclose(d1, d3)
+        assert np.allclose(f1, f3)
+        assert np.allclose(ns1, ns3)
+
+        # check exceptions
+        pytest.raises(AssertionError, pstokes._combine_pol_arrays, 'XX', 'pI', 'pI')
+        pytest.raises(AssertionError, pstokes._combine_pol_arrays, 'pI', 'YY', 'pI')
+        pytest.raises(ValueError, pstokes._combine_pol_arrays, 'XX', 'YY', 'pI',
+            pol_convention='blah')
+        pytest.raises(ValueError, pstokes._combine_pol_arrays, 'XX', 'YY', 'pI',
+            data_list=uvd1.data_array)
+        pytest.raises(ValueError, pstokes._combine_pol_arrays, 'XX', 'YY', 'pI',
+            data_list=[uvd1.data_array, uvd1.data_array, uvd1.data_array])
+        pytest.raises(AssertionError, pstokes._combine_pol_arrays, 'XX', 'YY', 'pI',
+            data_list=[uvd1.data_array, uvd1.data_array[0]])
 
     def test_construct_pstokes(self):
         uvd1 = self.uvd1