diff --git a/docs/conf.py b/docs/conf.py
index 58398988..1bf309d5 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,12 +26,13 @@
 # Mock-import modules to allow build to complete without throwing errors
 import mock
 MOCK_MODULES = ['numpy', 'scipy', 'scipy.interpolate', 'scipy.integrate', 
-                'pyuvdata', 'h5py', 'aipy', 'omnical', 'linsolve', 'hera_qm', 
-                'uvtools', 'uvtools.dspec', 'hera_cal', 'hera_cal.utils', 'healpy', 
-                'scikit-learn', 'astropy', 'astropy.cosmology', 'astropy.units', 
-                'astropy.constants', 'matplotlib', 'matplotlib.pyplot', 
-                'pylab', 'yaml', 'pyuvdata.utils', ]
-                
+                'pyuvdata', 'h5py', 'aipy', 'aipy.const', 'omnical', 'linsolve', 
+                'hera_qm', 'uvtools', 'uvtools.dspec', 'hera_cal', 
+                'hera_cal.utils', 'healpy', 'scikit-learn', 'astropy', 
+                'astropy.cosmology', 'astropy.units', 'astropy.constants', 
+                'matplotlib', 'matplotlib.pyplot', 'pylab', 'yaml', 
+                'pyuvdata.utils', 'hera_sim']
+
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = mock.Mock()
 
diff --git a/hera_pspec/VERSION b/hera_pspec/VERSION
index 0ea3a944..0d91a54c 100644
--- a/hera_pspec/VERSION
+++ b/hera_pspec/VERSION
@@ -1 +1 @@
-0.2.0
+0.3.0
diff --git a/hera_pspec/grouping.py b/hera_pspec/grouping.py
index fd0489ad..e558f71c 100644
--- a/hera_pspec/grouping.py
+++ b/hera_pspec/grouping.py
@@ -825,7 +825,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa
             # bin covariance: C_sph = H.T C_cyl H
             # cm shape (Npols, Ntimes, Ndlyblps, Ndlyblps)
             cm = np.moveaxis(cov_array_real[spw], -1, 0)
-            cm = Ht @ cm @ H
+            cm = Ht @ cm.clip(-1e40, 1e40) @ H  # clip infs
             cov_array_real[spw] = np.moveaxis(cm, 0, -1)
             cov_array_imag[spw] = np.zeros_like(cov_array_real[spw])
 
@@ -874,7 +874,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa
     uvp.lst_2_array = np.unique(uvp_in.lst_2_array)
 
     # Add to history
-    uvp.history += version.history_string(notes=add_to_history)
+    uvp.history = "Spherically averaged with hera_pspec [{}]\n{}\n{}\n{}".format(version.git_hash[:15], add_to_history, '-'*40, uvp.history)
 
     # validity check
     if run_check:
diff --git a/hera_pspec/plot.py b/hera_pspec/plot.py
index 9957f9e8..8c0c69d9 100644
--- a/hera_pspec/plot.py
+++ b/hera_pspec/plot.py
@@ -301,7 +301,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
                     deltasq=False, log=True, lst_in_hrs=True,
                     vmin=None, vmax=None, cmap='YlGnBu', axes=None, 
                     figsize=(14, 6), force_plot=False, times=None, 
-                    title_type='blpair', colorbar=True):
+                    title_type='blpair', colorbar=True, **kwargs):
     """
     Plot a 1D delay spectrum waterfall (or spectra) for a group of baselines.
     
@@ -374,6 +374,9 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
     colorbar : bool, optional
         Whether to make a colorbar. Default: True
 
+    kwargs : keyword arguments
+        Additional kwargs to pass to ax.matshow()
+
     Returns
     -------
     fig : matplotlib.pyplot.Figure
@@ -558,7 +561,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real',
             # plot waterfall
             cax = ax.matshow(waterfall[key], cmap=cmap, aspect='auto', 
                              vmin=vmin, vmax=vmax, 
-                             extent=[x[0], x[-1], y[-1], y[0]])
+                             extent=[x[0], x[-1], y[-1], y[0]], **kwargs)
 
             # ax config
             ax.xaxis.set_ticks_position('bottom')
diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py
index 6aca712c..bf0a9332 100644
--- a/hera_pspec/pspecdata.py
+++ b/hera_pspec/pspecdata.py
@@ -892,10 +892,11 @@ def set_R(self, d):
         d : dict
             Dictionary containing data to insert into data-weighting R matrix
             cache. Keys are tuples with the following form
-            key = (dset_index, bl_ant_pair_pol_tuple, data_weighting, taper)
-            Ex: (0, (37, 38, 'xx'), 'bh')
-            If data_weight == 'dayenu' then additional elements are appended
-                key + (filter_extension, spw_Nfreqs, symmetric_taper)
+            `key = (dset_index, bl_ant_pair_pol_tuple, data_weighting, taper)`
+            Example: `(0, (37, 38, 'xx'), 'bh')`
+            
+            If data_weight == 'dayenu' then additional elements are appended:
+            `key + (filter_extension, spw_Nfreqs, symmetric_taper)`
         """
         for k in d:
             self._R[k] = d[k]
@@ -1066,22 +1067,30 @@ def set_r_param(self, key, r_params):
 
         Parameters
         ----------
-        key: tuple (dset, bl, [pol]), where dset is the index of the dataset
-             bl is a 2-tuple
-             pol is a float or string specifying polarization
-
-        r_params: dictionary with parameters for weighting matrix.
-                  Proper fields
-                  and formats depend on the mode of data_weighting.
-                data_weighting == 'dayenu':
-                                dictionary with fields
-                                'filter_centers', list of floats (or float) specifying the (delay) channel numbers
-                                                  at which to center filtering windows. Can specify fractional channel number.
-                                'filter_half_widths', list of floats (or float) specifying the width of each
-                                                 filter window in (delay) channel numbers. Can specify fractional channel number.
-                                'filter_factors', list of floats (or float) specifying how much power within each filter window
-                                                  is to be suppressed.
-                Absence of r_params dictionary will result in an error!
+        key: tuple 
+            Key in the format: (dset, bl, [pol])
+            `dset` is the index of the dataset, `bl` is a 2-tuple, `pol` is a 
+            float or string specifying polarization.
+
+        r_params: dict
+            Dict containing parameters for weighting matrix. Proper fields and 
+            formats depend on the mode of data_weighting.
+            
+            For `data_weighting` set to `dayenu`, this is a dictionary with the 
+            following fields:
+            
+            `filter_centers`, list of floats (or float) specifying the (delay) 
+            channel numbers at which to center filtering windows. Can specify 
+            fractional channel number.
+            
+            `filter_half_widths`, list of floats (or float) specifying the 
+            width of each filter window in (delay) channel numbers. Can specify 
+            fractional channel number.
+            
+            `filter_factors`, list of floats (or float) specifying how much 
+            power within each filter window is to be suppressed.
+            
+            Absence of an `r_params` dictionary will result in an error.
         """
         key = self.parse_blkey(key)
         key = (self.data_weighting,) + key
@@ -1594,8 +1603,9 @@ def get_unnormed_E(self, key1, key2, exact_norm=False, pol=False):
 
         return 0.5 * E_matrices
 
-    def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=False,
-                       time_index=None):
+
+    def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, 
+                       pol=False, time_index=None):
         """
         Calculates the covariance matrix for unnormed bandpowers (i.e., the q
         vectors). If the data were real and x_1 = x_2, the expression would be
@@ -1608,8 +1618,8 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa
 
         .. math ::
             E^{12,a} = (1/2) R_1 Q^a R_2
-            C^1 = <x1 x1^dagger> - <x1><x1^dagger>
-            C^2 = <x2 x2^dagger> - <x2><x2^dagger>
+            C^1 = <x1 x1^\dagger> - <x1><x1^\dagger>
+            C^2 = <x2 x2^\dagger> - <x2><x2^\dagger>
             P^{12} = <x1 x2> - <x1><x2>
             S^{12} = <x1^* x2^*> - <x1^*> <x2^*>
 
@@ -1660,19 +1670,24 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa
             Used only if exact_norm is True.
 
         model : string, optional
-            Type of covariance model to calculate, if not cached. Options=['empirical', 'dsets', 'autos']
+            Type of covariance model to calculate, if not cached. 
+            Options=['empirical', 'dsets', 'autos']
             How the covariances of the input data should be estimated.
+            
             In 'dsets' mode, error bars are estimated from user-provided
-            per baseline and per channel standard deivations.
-            If 'empirical' is set, then error bars are estimated from the data by averaging the
-            channel-channel covariance of each baseline over time and
-            then applying the appropriate linear transformations to these
-            frequency-domain covariances.
-            If 'autos' is set, the covariances of the input data
-            over a baseline is estimated from the autocorrelations of the two antennas over channel bandwidth
-            and integration time.
-
-        time_index : integer, compute covariance at specific time-step
+            per baseline and per channel standard deivations. 
+            
+            If 'empirical' is set, then error bars are estimated from the data 
+            by averaging the channel-channel covariance of each baseline over 
+            time and then applying the appropriate linear transformations to 
+            these frequency-domain covariances. 
+            
+            If 'autos' is set, the covariances of the input data over a 
+            baseline is estimated from the autocorrelations of the two antennas 
+            over channel bandwidth and integration time. 
+
+        time_index : int, optional
+            Compute covariance at specific time-step. Default: None.
 
         Returns
         -------
@@ -1683,8 +1698,10 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa
         E_matrices = self.get_unnormed_E(key1, key2, exact_norm=exact_norm, pol=pol)
         C1 = self.C_model(key1, model=model, time_index=time_index)
         C2 = self.C_model(key2, model=model, time_index=time_index)
-        P21 = self.cross_covar_model(key2, key1, model=model, conj_1=False, conj_2=False, time_index=time_index)
-        S21 = self.cross_covar_model(key2, key1, model=model, conj_1=True, conj_2=True, time_index=time_index)
+        P21 = self.cross_covar_model(key2, key1, model=model, conj_1=False, 
+                                     conj_2=False, time_index=time_index)
+        S21 = self.cross_covar_model(key2, key1, model=model, conj_1=True, 
+                                     conj_2=True, time_index=time_index)
 
         E21C1 = np.dot(np.transpose(E_matrices.conj(), (0,2,1)), C1)
         E12C2 = np.dot(E_matrices, C2)
@@ -1695,62 +1712,78 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa
 
         return auto_term + cross_term
 
-    def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=False, model='empirical', known_cov=None):
+    def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, 
+                                pol=False, model='empirical', known_cov=None):
         """
         Calculates the auto-covariance matrix for both the real and imaginary
         parts of bandpowers (i.e., the q vectors and the p vectors).
 
-        Define
-
-        .. math ::
+        Define:
+        
             Real part of q_a = (1/2) (q_a + q_a^*)
-            Imaginary part of q_a = (1/2i) (q_a - q_a^dagger)
-            Real part of p_a = (1/2) (p_a + p_a^dagger)
-            Imaginary part of p_a = (1/2i) (p_a - p_a^dagger)
-
+            Imaginary part of q_a = (1/2i) (q_a - q_a^\dagger)
+            Real part of p_a = (1/2) (p_a + p_a^\dagger)
+            Imaginary part of p_a = (1/2i) (p_a - p_a^\dagger)
+        
+        .. math ::
+        
             E^{12,a} = (1/2) R_1 Q^a R_2
-            C^{12} = <x1 x2^dagger> - <x1><x2^dagger>
+            C^{12} = <x1 x2^\dagger> - <x1><x2^\dagger>
             P^{12} = <x1 x2> - <x1><x2>
             S^{12} = <x1^* x2^*> - <x1^*> <x2^*>
             p_a = M_{ab} q_b
 
-        Then
+        Then:
+        
+        The variance of (1/2) (q_a + q_a^\dagger):
+        
+        .. math ::
+        
+            (1/4){ (<q_a q_a> - <q_a><q_a>) + 2(<q_a q_a^\dagger> - <q_a><q_a^\dagger>)
+            + (<q_a^\dagger q_a^\dagger> - <q_a^\dagger><q_a^\dagger>) }
 
+        The variance of (1/2i) (q_a - q_a^\dagger):
+        
         .. math ::
-        The variance of (1/2) (q_a + q_a^dagger):
-        (1/4){ (<q_a q_a> - <q_a><q_a>) + 2(<q_a q_a^dagger> - <q_a><q_a^dagger>)
-        + (<q_a^dagger q_a^dagger> - <q_a^dagger><q_a^dagger>) }
-
-        The variance of (1/2i) (q_a - q_a^dagger) :
-        (-1/4){ (<q_a q_a> - <q_a><q_a>) - 2(<q_a q_a^dagger> - <q_a><q_a^dagger>)
-        + (<q_a^dagger q_a^dagger> - <q_a^dagger><q_a^dagger>) }
-
-        The variance of (1/2) (p_a + p_a^dagger):
-        (1/4) { M_{ab} M_{ac} (<q_b q_c> - <q_b><q_c>) +
-        M_{ab} M_{ac}^* (<q_b q_c^dagger> - <q_b><q_c^dagger>) +
-        M_{ab}^* M_{ac} (<q_b^dagger q_c> - <q_b^dagger><q_c>) +
-        M_{ab}^* M_{ac}^* (<q_b^dagger q_c^dagger> - <q_b^dagger><q_c^dagger>) }
-
-        The variance of (1/2i) (p_a - p_a^dagger):
-        (-1/4) { M_{ab} M_{ac} (<q_b q_c> - <q_b><q_c>) -
-        M_{ab} M_{ac}^* (<q_b q_c^dagger> - <q_b><q_c^dagger>) -
-        M_{ab}^* M_{ac} (<q_b^dagger q_c> - <q_b^dagger><q_c>) +
-        M_{ab}^* M_{ac}^* (<q_b^dagger q_c^dagger> - <q_b^dagger><q_c^dagger>) }
+        
+            (-1/4){ (<q_a q_a> - <q_a><q_a>) - 2(<q_a q_a^\dagger> - <q_a><q_a^\dagger>)
+            + (<q_a^\dagger q_a^\dagger> - <q_a^\dagger><q_a^\dagger>) }
+
+        The variance of (1/2) (p_a + p_a^\dagger):
+        
+        .. math ::
+        
+            (1/4) { M_{ab} M_{ac} (<q_b q_c> - <q_b><q_c>) +
+            M_{ab} M_{ac}^* (<q_b q_c^\dagger> - <q_b><q_c^\dagger>) +
+            M_{ab}^* M_{ac} (<q_b^\dagger q_c> - <q_b^\dagger><q_c>) +
+            M_{ab}^* M_{ac}^* (<q_b^\dagger q_c^\dagger> - <q_b^\dagger><q_c^\dagger>) }
+
+        The variance of (1/2i) (p_a - p_a^\dagger):
+        
+        .. math ::
+        
+            (-1/4) { M_{ab} M_{ac} (<q_b q_c> - <q_b><q_c>) -
+            M_{ab} M_{ac}^* (<q_b q_c^\dagger> - <q_b><q_c^\dagger>) -
+            M_{ab}^* M_{ac} (<q_b^\dagger q_c> - <q_b^\dagger><q_c>) +
+            M_{ab}^* M_{ac}^* (<q_b^\dagger q_c^\dagger> - <q_b^\dagger><q_c^\dagger>) }
 
         where
-        <q_a q_b> - <q_a><q_b> =
-                    tr(E^{12,a} C^{21} E^{12,b} C^{21})
-                    + tr(E^{12,a} P^{22} E^{21,b*} S^{11})
-        <q_a q_b^dagger> - <q_a><q_b^dagger> =
-                    tr(E^{12,a} C^{22} E^{21,b} C^{11})
-                    + tr(E^{12,a} P^{21} E^{12,b *} S^{21})
-        <q_a^dagger q_b^dagger> - <q_a^dagger><q_b^dagger> =
-                    tr(E^{21,a} C^{12} E^{21,b} C^{12})
-                    + tr(E^{21,a} P^{11} E^{12,b *} S^{22})
+        
+        .. math ::
+            <q_a q_b> - <q_a><q_b> =
+                        tr(E^{12,a} C^{21} E^{12,b} C^{21})
+                        + tr(E^{12,a} P^{22} E^{21,b*} S^{11})
+            <q_a q_b^\dagger> - <q_a><q_b^\dagger> =
+                        tr(E^{12,a} C^{22} E^{21,b} C^{11})
+                        + tr(E^{12,a} P^{21} E^{12,b *} S^{21})
+            <q_a^\dagger q_b^\dagger> - <q_a^\dagger><q_b^\dagger> =
+                        tr(E^{21,a} C^{12} E^{21,b} C^{12})
+                        + tr(E^{21,a} P^{11} E^{12,b *} S^{22})
 
         Note that
 
         .. math ::
+        
             E^{12,a}_{ij}.conj = E^{21,a}_{ji}
 
         This function estimates C^1, C^2, P^{12}, and S^{12} empirically by
@@ -1786,34 +1819,47 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals
             Used only if exact_norm is True.
 
         model : string, optional
-            Type of covariance model to use. if not cached. Options=['empirical', 'dsets', 'autos', 'foreground_dependent',
-            (other model names in known_cov)]
-            In 'dsets' mode, error bars are estimated from user-provided
-            per baseline and per channel standard deivations. In 'empirical' mode,
-            error bars are estimated from the data by averaging the
-            channel-channel covariance of each baseline over time and
-            then applying the appropriate linear transformations to these
-            frequency-domain covariances. In 'autos' mode, the covariances of the input data
-            over a baseline is estimated from the autocorrelations of the two antennas forming the baseline
-            across channel bandwidth and integration time.
-            In 'foreground_dependent' mode, it involves using auto-correlation amplitudes to model the input noise covariance
-            and visibility outer products to model the input systematics covariance.
-
-            ############
-            When model is chosen as "autos" or "dsets", only C^{11} and C^{22} are accepted as non-zero values,
-            and the two matrices are also expected to be diagonal,
-            thus only <q_a q_b^dagger> - <q_a><q_b^dagger> = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] exists
-            in the covariance terms of q vectors.
-            When model is chosen as 'foreground_dependent', we further include the signal-noise coupling term
-            besides the noise in the output covariance. Still only <q_a q_b^dagger> - <q_a><q_b^dagger> is non-zero,
-            while it takes a form of tr[ E^{12,a} Cn^{22} E^{21,b} Cn^{11} +
-            E^{12,a} Cs^{22} E^{21,b} Cn^{11} +
+            Type of covariance model to use. if not cached. 
+            Options=['empirical', 'dsets', 'autos', 'foreground_dependent', 
+            (other model names in known_cov)].
+            
+            In `dsets` mode, error bars are estimated from user-provided
+            per baseline and per channel standard deivations.
+            
+            In `empirical` mode, error bars are estimated from the data by 
+            averaging the channel-channel covariance of each baseline over time 
+            and then applying the appropriate linear transformations to these
+            frequency-domain covariances.
+            
+            In `autos` mode, the covariances of the input data over a baseline 
+            is estimated from the autocorrelations of the two antennas forming 
+            the baseline across channel bandwidth and integration time.
+            
+            In `foreground_dependent` mode, it involves using auto-correlation 
+            amplitudes to model the input noise covariance and visibility outer 
+            products to model the input systematics covariance. 
+            
+            When model is chosen as `autos` or `dsets`, only C^{11} and C^{22} 
+            are accepted as non-zero values, and the two matrices are also 
+            expected to be diagonal, thus only 
+            <q_a q_b^\dagger> - <q_a><q_b^\dagger> = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] 
+            exists in the covariance terms of q vectors.
+            
+            When model is chosen as `foreground_dependent`, we further include 
+            the signal-noise coupling term besides the noise in the output 
+            covariance. Still only <q_a q_b^\dagger> - <q_a><q_b^\dagger> is 
+            non-zero, while it takes a form of 
+            tr[ E^{12,a} Cn^{22} E^{21,b} Cn^{11} +  
+            E^{12,a} Cs^{22} E^{21,b} Cn^{11} + 
             E^{12,a} Cn^{22} E^{21,b} Cs^{11} ],
-            where Cn is just Cautos, the input noise covariance estimated by the auto-correlation amplitudes (by calling C_model(model='autos')), and
-            Cs uses the outer product of input visibilities to model the covariance on systematics.
+            where Cn is just Cautos, the input noise covariance estimated by 
+            the auto-correlation amplitudes (by calling C_model(model='autos')), 
+            and Cs uses the outer product of input visibilities to model the 
+            covariance on systematics.
+            
             To construct a symmetric and unbiased covariance matrix, we choose
-            Cs^{11}_{ij} = Cs^{22}_{ij} = 1/2 * [ x1_i x2_j^{*} + x2_i x1_j^{*} ], which preserves the property Cs_{ij}^* = Cs_{ji}.
-            #############
+            Cs^{11}_{ij} = Cs^{22}_{ij} = 1/2 * [ x1_i x2_j^{*} + x2_i x1_j^{*} ], 
+            which preserves the property Cs_{ij}^* = Cs_{ji}.
 
         known_cov : dicts of covariance matrices
             Covariance matrices that are not calculated internally from data.
@@ -1849,7 +1895,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals
         cov_q_real, cov_q_imag, cov_p_real, cov_p_imag = [], [], [], []
         for time_index in range(self.dsets[0].Ntimes):
             if model in ['dsets','autos']:
-                # calculate <q_a q_b^dagger> - <q_a><q_b^dagger> = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ]
+                # calculate <q_a q_b^\dagger> - <q_a><q_b^\dagger> = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ]
                 # We have used tr[A D_1 B D_2] = \sum_{ijkm} A_{ij} d_{1j} \delta_{jk} B_{km} d_{2m} \delta_{mi} = \sum_{ik} [A_{ik}*d_{1k}] * [B_{ki}*d_{2i}]
                 # to simplify the computation.
                 C11 = self.C_model(key1, model=model, known_cov=known_cov, time_index=time_index)
@@ -1946,8 +1992,8 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals
             else:
                 assert np.shape(q_q) == np.shape(m), "covariance matrix and normalization matrix has different shapes."
                 MMq_q = np.einsum('ab,cd,bd->ac', m, m, q_q, optimize=einstein_path_2)
-            # calculate \sum_{bd} [ M_{ab} M_{cd}^* (<q_b q_d^dagger> - <q_b><q_d^dagger>) ]
-            # and \sum_{bd} [ M_{ab}^* M_{cd} (<q_b^dagger q_d> - <q_b^dagger><q_d>) ]
+            # calculate \sum_{bd} [ M_{ab} M_{cd}^* (<q_b q_d^\dagger> - <q_b><q_d^\dagger>) ]
+            # and \sum_{bd} [ M_{ab}^* M_{cd} (<q_b^\dagger q_d> - <q_b^\dagger><q_d>) ]
             if np.isclose([q_qdagger], 0).all():
                 MM_q_qdagger = 0.+1.j*0
                 M_Mq_qdagger_ = 0.+1.j*0
@@ -1955,7 +2001,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals
                 assert np.shape(q_qdagger) == np.shape(m), "covariance matrix and normalization matrix has different shapes."
                 MM_q_qdagger = np.einsum('ab,cd,bd->ac', m, m.conj(), q_qdagger, optimize=einstein_path_2)
                 M_Mq_qdagger_ = np.einsum('ab,cd,bd->ac', m.conj(), m, q_qdagger.conj(), optimize=einstein_path_2)
-            # calculate \sum_{bd} [ M_{ab}^* M_{cd}^* (<q_b^dagger q_d^dagger> - <q_b^dagger><q_d^dagger>) ]
+            # calculate \sum_{bd} [ M_{ab}^* M_{cd}^* (<q_b^\dagger q_d^\dagger> - <q_b^\dagger><q_d^\dagger>) ]
             if np.isclose([qdagger_qdagger], 0).all():
                 M_M_qdagger_qdagger = 0.+1.j*0
             else:
@@ -1989,6 +2035,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals
 
         return cov_q_real, cov_q_imag, cov_p_real, cov_p_imag
 
+
     def get_MW(self, G, H, mode='I', band_covar=None, exact_norm=False, rcond=1e-15):
         """
         Construct the normalization matrix M and window function matrix W for
@@ -2676,7 +2723,8 @@ def validate_pol(self, dsets, pol_pair):
     def pspec(self, bls1, bls2, dsets, pols, n_dlys=None,
               input_data_weight='identity', norm='I', taper='none',
               sampling=False, little_h=True, spw_ranges=None, symmetric_taper=True,
-              baseline_tol=1.0, store_cov=False, store_cov_diag=False, return_q=False, store_window=True, verbose=True,
+              baseline_tol=1.0, store_cov=False, store_cov_diag=False, 
+              return_q=False, store_window=True, verbose=True,
               filter_extensions=None, exact_norm=False, history='', r_params=None,
               cov_model='empirical', known_cov=None):
         """
@@ -2771,11 +2819,13 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None,
             in the UVPSpec object.
 
         store_cov_diag : bool, optional
-            If True, store the square root of the diagonal of the output covariance matrix
-            calculated by using get_analytic_covariance(). The error bars will
-            be stored in the form of: sqrt(diag(cov_array_real)) + 1.j*sqrt(diag(cov_array_imag)).
-            It's a way to save the disk space since the whole cov_array data with a size of Ndlys x Ndlys x Ntimes x Nblpairs x Nspws
-            is too large.
+            If True, store the square root of the diagonal of the output 
+            covariance matrix calculated by using get_analytic_covariance(). 
+            The error bars will be stored in the form of: 
+            `sqrt(diag(cov_array_real)) + 1.j*sqrt(diag(cov_array_imag))`.
+            It's a way to save the disk space since the whole cov_array data 
+            with a size of Ndlys x Ndlys x Ntimes x Nblpairs x Nspws is too 
+            large. 
 
         return_q : bool, optional
             If True, return the results (delay spectra and covariance
@@ -2786,30 +2836,42 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None,
             Default: True
 
         cov_model : string, optional
-            Type of covariance model to calculate, if not cached. Options=['empirical', 'dsets', 'autos', 'foreground_dependent',
+            Type of covariance model to calculate, if not cached. 
+            
+            Options=['empirical', 'dsets', 'autos', 'foreground_dependent',
             (other model names in known_cov)]
-            In 'dsets' mode, error bars are estimated from user-provided per baseline and per channel standard deivations.
-            In 'empirical' mode, error bars are estimated from the data by averaging the
-            channel-channel covariance of each baseline over time and
-            then applying the appropriate linear transformations to these
-            frequency-domain covariances.
-            In 'autos' mode, the covariances of the input data
-            over a baseline is estimated from the autocorrelations of the two antennas forming the baseline
-            across channel bandwidth and integration time.
-            In 'foreground_dependent' mode, it involves using auto-correlation amplitudes to model the input noise covariance
-            and visibility outer products to model the input systematics covariance.
+            
+            In 'dsets' mode, error bars are estimated from user-provided per 
+            baseline and per channel standard deivations. 
+            
+            In 'empirical' mode, error bars are estimated from the data by 
+            averaging the channel-channel covariance of each baseline over 
+            time and then applying the appropriate linear transformations to 
+            these frequency-domain covariances.
+            
+            In 'autos' mode, the covariances of the input data over a baseline 
+            is estimated from the autocorrelations of the two antennas forming 
+            the baseline across channel bandwidth and integration time.
+            
+            In 'foreground_dependent' mode, it involves using auto-correlation 
+            amplitudes to model the input noise covariance and visibility 
+            outer products to model the input systematics covariance. 
+            
             For more details see ds.get_analytic_covariance().
 
         known_cov : dicts of input covariance matrices
-            known_cov has a type {Ckey:covariance}, which is the same with
-            ds._C. The matrices stored in known_cov are constructed
-            outside the PSpecData object, different from those in ds._C which are constructed
+            `known_cov` has a type {Ckey:covariance}, which is the same as 
+            ds._C. The matrices stored in known_cov are constructed outside 
+            the PSpecData object, unlike those in ds._C which are constructed 
             internally.
-            The Ckey should conform to
-            (dset_pair_index, blpair_int, model, time_index, conj_1, conj_2),
-            e.g. ((0, 1), ((25,37,"xx"), (25, 37, "xx")), 'empirical', False, True),
+            
+            The Ckey should conform to:
+            `(dset_pair_index, blpair_int, model, time_index, conj_1, conj_2)`,
+            e.g.
+            `((0, 1), ((25,37,"xx"), (25, 37, "xx")), 'empirical', False, True)`,
             while covariance are ndarrays with shape (Nfreqs, Nfreqs).
-            Also see PSpecData.set_C() for more details.
+            
+            Also see PSpecData.set_C() for more details. 
 
         verbose : bool, optional
             If True, print progress, warnings and debugging info to stdout.
@@ -2824,20 +2886,26 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None,
             False runs two times faster than setting it to True.
 
         history : str, optional
-            history string to attach to UVPSpec object
+            History string to attach to UVPSpec object
 
         r_params: dictionary with parameters for weighting matrix.
-                  Proper fields
-                  and formats depend on the mode of data_weighting.
-                data_weighting == 'dayenu':
-                                dictionary with fields
-                                'filter_centers', list of floats (or float) specifying the (delay) channel numbers
-                                                  at which to center filtering windows. Can specify fractional channel number.
-                                'filter_half_widths', list of floats (or float) specifying the width of each
-                                                 filter window in (delay) channel numbers. Can specify fractional channel number.
-                                'filter_factors', list of floats (or float) specifying how much power within each filter window
-                                                  is to be suppressed.
-                Absence of r_params dictionary will result in an error!
+            Proper fields and formats depend on the mode of data_weighting.
+            
+            For `data_weighting` set to 'dayenu', `r_params` should be a dict 
+            with the following fields:
+            
+            `filter_centers`, a list of floats (or float) specifying the 
+            (delay) channel numbers at which to center filtering windows. Can 
+            specify fractional channel number.
+            
+            `filter_half_widths`, a list of floats (or float) specifying the 
+            width of each filter window in (delay) channel numbers. Can specify 
+            fractional channel number.
+            
+            `filter_factors`, list of floats (or float) specifying how much 
+            power within each filter window is to be suppressed.
+            
+            Absence of an `r_params` dictionary will result in an error.
 
         Returns
         -------
diff --git a/hera_pspec/tests/test_grouping.py b/hera_pspec/tests/test_grouping.py
index 63b63d1e..44837860 100644
--- a/hera_pspec/tests/test_grouping.py
+++ b/hera_pspec/tests/test_grouping.py
@@ -469,6 +469,13 @@ def test_spherical_average():
     # bug check: in the past, combine after spherical average erroneously changed dly_array
     assert sph == sph_c
 
+    # insert an inf into the arrays as a test
+    uvp2 = copy.deepcopy(uvp)
+    uvp2.cov_array_real[0][0], uvp2.cov_array_imag[0][0] = np.inf, np.inf
+    uvp2.stats_array['err'][0][0] = np.inf
+    sph = grouping.spherical_average(uvp, kbins, bin_widths)
+    assert np.isfinite(sph.cov_array_real[0]).all()
+
     # exceptions
     nt.assert_raises(AssertionError, grouping.spherical_average, uvp, kbins, 1.0)
 
diff --git a/hera_pspec/tests/test_noise.py b/hera_pspec/tests/test_noise.py
index 46d73970..517bba9d 100644
--- a/hera_pspec/tests/test_noise.py
+++ b/hera_pspec/tests/test_noise.py
@@ -159,22 +159,27 @@ def test_analytic_noise():
     # get P_N estimate
     auto_Tsys = utils.uvd_to_Tsys(uvd, beam, os.path.join(DATA_PATH, "test_uvd.uvh5"))
     assert os.path.exists(os.path.join(DATA_PATH, "test_uvd.uvh5"))
-    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'])
+    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=False)
 
     # check consistency of 1-sigma standard dev. to 1%
     cov_diag = uvp.cov_array_real[0][:, range(Nchan), range(Nchan)]
     stats_diag = uvp.stats_array['P_N'][0]
     frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag
-
     assert np.abs(frac_ratio).mean() < 0.01
 
     ## check P_SN consistency of 1-sigma standard dev. to 1%
     cov_diag = uvp_fg.cov_array_real[0][:, range(Nchan), range(Nchan)]
     stats_diag = uvp.stats_array['P_SN'][0]
     frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag
-
     assert np.abs(frac_ratio).mean() < 0.01
 
+    # now compute unbiased P_SN and check that it matches P_N at high-k    
+    utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=True)
+    frac_ratio = (uvp.stats_array["P_SN"][0] - uvp.stats_array["P_N"][0]) / uvp.stats_array["P_N"][0]
+    dlys = uvp.get_dlys(0) * 1e9
+    select = np.abs(dlys) > 3000
+    assert np.abs(frac_ratio[:, select].mean()) < 1 / np.sqrt(uvp.Nblpairts)
+
     # clean up
     os.remove(os.path.join(DATA_PATH, "test_uvd.uvh5"))
 
diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py
index 44017975..4c19800f 100644
--- a/hera_pspec/utils.py
+++ b/hera_pspec/utils.py
@@ -1301,7 +1301,7 @@ def uvd_to_Tsys(uvd, beam, Tsys_outfile=None):
     return uvd
 
 
-def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
+def uvp_noise_error(uvp, auto_Tsys=None, err_type='P_N', precomp_P_N=None, P_SN_correction=True):
     """
     Calculate analytic thermal noise error for a UVPSpec object.
     Adds to uvp.stats_array inplace.
@@ -1313,9 +1313,10 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
         If err_type == 'P_SN', uvp should not have any
         incoherent averaging applied.
 
-    auto_Tsys : UVData object
+    auto_Tsys : UVData object, optional
         Holds autocorrelation Tsys estimates in Kelvin (see uvd_to_Tsys)
         for all antennas and polarizations involved in uvp power spectra.
+        Needed for P_N computation, not needed if feeding precomp_P_N.
 
     err_type : str or list of str, options = ['P_N', 'P_SN']
         Type of thermal noise error to compute. P_N is the standard
@@ -1326,9 +1327,13 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
         which uses uses Re[P(tau)] as a proxy for P_S.
         To store both, feed as err_type = ['P_N', 'P_SN']
 
-    precomp_P_N : str
+    precomp_P_N : str, optional
         If computing P_SN and P_N is already computed, use this key
         to index stats_array for P_N rather than computing it from auto_Tsys.
+
+    P_SN_correctoin : bool, optional
+        Apply correction factor if computing P_SN to account for double
+        counting of noise.
     """
     from hera_pspec import uvpspec_utils
 
@@ -1336,9 +1341,10 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
     if isinstance(err_type, str):
         err_type = [err_type]
 
-    # get metadata
-    lsts = np.unique(auto_Tsys.lst_array)
-    freqs = auto_Tsys.freq_array[0]
+    # get metadata if needed
+    if precomp_P_N is None:
+        lsts = np.unique(auto_Tsys.lst_array)
+        freqs = auto_Tsys.freq_array[0]
 
     # iterate over spectral window
     for spw in uvp.spw_array:
@@ -1393,10 +1399,47 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None):
                 if 'P_SN' in err_type:
                     # calculate P_SN: see Tan+2020 and
                     # H1C_IDR2/notebooks/validation/errorbars_with_systematics_and_noise.ipynb
+                    # get signal proxy
                     P_S = uvp.get_data(key).real
+                    # clip negative values
                     P_S[P_S < 0] = 0
                     P_SN = np.sqrt(np.sqrt(2) * P_S * P_N + P_N**2)
                     # catch nans, set to inf
                     P_SN[np.isnan(P_SN)] = np.inf
                     # set stats
                     uvp.set_stats('P_SN', key, P_SN)
+
+    # P_SN correction
+    if P_SN_correction and "P_SN" in err_type:
+        if precomp_P_N is None:
+            precomp_P_N = 'P_N'
+        apply_P_SN_correction(uvp, P_SN='P_SN', P_N=precomp_P_N)
+
+
+def apply_P_SN_correction(uvp, P_SN='P_SN', P_N='P_N'):
+    """
+    Apply correction factor to P_SN errorbar in stats_array to account
+    for double counting of noise by using data as proxy for signal.
+    See Jianrong Tan et al. 2021 (Errorbar methodologies).
+    Operates in place. Must have both P_SN and P_N in stats_array.
+
+    Args:
+        uvp : UVPSpec object
+            With P_SN and P_N errorbar (not variance) as a key in stats_array
+        P_SN : str
+            Key in stats_array for P_SN errorbar
+        P_N : str
+            Key in stats_array for P_N errorbar
+    """
+    assert P_SN in uvp.stats_array
+    assert P_N in uvp.stats_array
+    for spw in uvp.spw_array:
+        # get P_SN and P_N
+        p_n = uvp.stats_array[P_N][spw]
+        p_sn = uvp.stats_array[P_SN][spw]
+        # derive correction
+        corr = 1 - (np.sqrt(1 / np.sqrt(np.pi) + 1) - 1) * p_n.real / p_sn.real.clip(1e-40, np.inf)
+        corr[np.isclose(corr, 0)] = np.inf
+        corr[corr < 0] = np.inf
+        # apply correction
+        uvp.stats_array[P_SN][spw] *= corr
diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py
index 8fa3b640..2b3690a8 100644
--- a/hera_pspec/uvpspec.py
+++ b/hera_pspec/uvpspec.py
@@ -362,24 +362,22 @@ def get_integrations(self, key, omit_flags=False):
 
     def get_r_params(self):
         """
-        decompress r_params dictionary (so it can be readily used).
-        in a pspecdata object, r_parms are stored as a dictionary
-        with one entry per baseline.
-        In uvpspec, the dictionary is compressed so that a single r_param entry
-        correspondsto multiple baselines and is stored as a json format string.
-        get_r_params() reads the compressed string and returns the dictionary
-        with the format
-        r_params: dictionary with parameters for weighting matrix.
-          Proper fields
-          and formats depend on the mode of data_weighting.
-          data_weighting == 'dayenu':
-            dictionary with fields
-            'filter_centers', list of floats (or float) specifying the (delay) channel numbers
-                              at which to center filtering windows. Can specify fractional channel number.
-            'filter_half_widths', list of floats (or float) specifying the width of each
-                             filter window in (delay) channel numbers. Can specify fractional channel number.
-            'filter_factors', list of floats (or float) specifying how much power within each filter window
-                              is to be suppressed.
+        Return an `r_params` dictionary.
+        
+        In a `PSpecData` object, the `r_params` are stored as a dictionary with 
+        one entry per baseline.
+        
+        In a `UVPSpec` object, the dictionary is compressed so that a single 
+        `r_param` entry correspondsto multiple baselines and is stored as a 
+        JSON format string.
+        
+        This function reads the compressed string and returns the dictionary
+        with the correct following fields and structure.
+        
+        Returns
+        -------
+        r_params : dict
+            Dictionary of r_params for this object.
         """
         return uvputils.decompress_r_params(self.r_params)
 
@@ -1776,13 +1774,15 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
         where scalar is the cosmological and beam scalar (i.e. X2Y * Omega_eff)
         calculated from pspecbeam with noise_scalar = True, integration_time is
         in seconds and comes from self.integration_array and Nincoherent is the
-        number of incoherent averaging samples and comes from
-        self.nsample_array. If component is 'real' or 'imag', P_N is divided by
-        an additional factor of sqrt(2).
+        number of incoherent averaging samples and comes from `self.nsample_array`. 
+        If `component` is `real` or `imag`, P_N is divided by an additional 
+        factor of sqrt(2).
 
         If the polarizations specified are pseudo Stokes pol (I, Q, U or V)
         then an extra factor of 2 is divided.
-        If form == 'DelSq' then a factor of |k|^3 / (2pi^2) is multiplied.
+        
+        If `form` is `DelSq` then a factor of `|k|^3 / (2pi^2)` is multiplied.
+        
         If real is True, a factor of sqrt(2) is divided to account for
         discarding imaginary noise component.
 
@@ -1822,8 +1822,9 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
             Number of frequency bins to use in integrating power spectrum
             scalar in pspecbeam. Default: 2000.
 
-        component : str, options=['real', 'imag', 'abs']
-            If component is real or imag, divide by an extra factor of sqrt(2)
+        component : str
+            Options=['real', 'imag', 'abs'].
+            If component is real or imag, divide by an extra factor of sqrt(2).
 
         Returns
         -------
@@ -1909,14 +1910,14 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None,
 
     def average_spectra(self, blpair_groups=None, time_avg=False,
                         blpair_weights=None, error_field=None, error_weights=None,
-                        inplace=True):
+                        inplace=True, add_to_history=''):
         """
         Average power spectra across the baseline-pair-time axis, weighted by
         each spectrum's integration time.
 
         This is an "incoherent" average, in the sense that this averages power
-        spectra, rather than visibility data. The 'nsample_array' and
-        'integration_array' will be updated to reflect the averaging.
+        spectra, rather than visibility data. The `nsample_array` and
+        `integration_array` will be updated to reflect the averaging.
 
         In the case of averaging across baseline pairs, the resultant averaged
         spectrum is assigned to the zeroth blpair in the group. In the case of
@@ -1929,7 +1930,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
         equivalent to cylindrical binning in 3D k-space.
 
         If you want help constructing baseline-pair groups from baseline
-        groups, see self.get_blpair_groups_from_bl_groups.
+        groups, see `self.get_blpair_groups_from_bl_groups`.
 
         Parameters
         ----------
@@ -1937,6 +1938,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
             List of list of baseline-pair group tuples or integers. All power
             spectra in a baseline-pair group are averaged together. If a
             baseline-pair exists in more than one group, a warning is raised.
+            
             Examples::
 
                 blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))],
@@ -1951,6 +1953,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
         blpair_weights : list, optional
             List of float or int weights dictating the relative weight of each
             baseline-pair when performing the average.
+            
             This is useful for bootstrapping. This should have the same shape
             as blpair_groups if specified. The weights are automatically
             normalized within each baseline-pair group. Default: None (all
@@ -1965,17 +1968,21 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
 
         error_weights: string, optional
             error_weights specify which kind of errors we use for weights 
-            during averaging power spectra.
-            The weights are defined as $w_i = 1/ sigma_i^2$, 
-            where $sigma_i$ is taken from the relevant field of stats_array.
-            If `error_weight' is set to None, which means we just use the 
-            integration time as weights. If error_weights is specified,
-            then it also gets appended to error_field as a list.
-            Default: None
+            during averaging power spectra. The weights are defined as 
+            $w_i = 1/ sigma_i^2$, where $sigma_i$ is taken from the relevant 
+            field of stats_array.
+            
+            If `error_weight` is set to `None`, which means we just use the 
+            integration time as weights. If `error_weights` is specified,
+            then it also gets appended to `error_field` as a list.
+            Default: None.
 
         inplace : bool, optional
-            If True, edit data in self, else make a copy and return. Default:
-            True.
+            If True, edit data in self, else make a copy and return. 
+            Default: True.
+
+        add_to_history : str, optional
+            Added text to add to file history.
 
         Notes
         -----
@@ -1991,14 +1998,14 @@ def average_spectra(self, blpair_groups=None, time_avg=False,
                                      error_field=error_field,
                                      error_weights=error_weights,
                                      blpair_weights=blpair_weights,
-                                     inplace=True)
+                                     inplace=True, add_to_history=add_to_history)
         else:
             return grouping.average_spectra(self, blpair_groups=blpair_groups,
                                             time_avg=time_avg,
                                             error_field=error_field,
                                             error_weights=error_weights,
                                             blpair_weights=blpair_weights,
-                                            inplace=False)
+                                            inplace=False, add_to_history=add_to_history)
 
     def fold_spectra(self):
         """