diff --git a/docs/conf.py b/docs/conf.py index 58398988..1bf309d5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,12 +26,13 @@ # Mock-import modules to allow build to complete without throwing errors import mock MOCK_MODULES = ['numpy', 'scipy', 'scipy.interpolate', 'scipy.integrate', - 'pyuvdata', 'h5py', 'aipy', 'omnical', 'linsolve', 'hera_qm', - 'uvtools', 'uvtools.dspec', 'hera_cal', 'hera_cal.utils', 'healpy', - 'scikit-learn', 'astropy', 'astropy.cosmology', 'astropy.units', - 'astropy.constants', 'matplotlib', 'matplotlib.pyplot', - 'pylab', 'yaml', 'pyuvdata.utils', ] - + 'pyuvdata', 'h5py', 'aipy', 'aipy.const', 'omnical', 'linsolve', + 'hera_qm', 'uvtools', 'uvtools.dspec', 'hera_cal', + 'hera_cal.utils', 'healpy', 'scikit-learn', 'astropy', + 'astropy.cosmology', 'astropy.units', 'astropy.constants', + 'matplotlib', 'matplotlib.pyplot', 'pylab', 'yaml', + 'pyuvdata.utils', 'hera_sim'] + for mod_name in MOCK_MODULES: sys.modules[mod_name] = mock.Mock() diff --git a/hera_pspec/VERSION b/hera_pspec/VERSION index 0ea3a944..0d91a54c 100644 --- a/hera_pspec/VERSION +++ b/hera_pspec/VERSION @@ -1 +1 @@ -0.2.0 +0.3.0 diff --git a/hera_pspec/grouping.py b/hera_pspec/grouping.py index fd0489ad..e558f71c 100644 --- a/hera_pspec/grouping.py +++ b/hera_pspec/grouping.py @@ -825,7 +825,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa # bin covariance: C_sph = H.T C_cyl H # cm shape (Npols, Ntimes, Ndlyblps, Ndlyblps) cm = np.moveaxis(cov_array_real[spw], -1, 0) - cm = Ht @ cm @ H + cm = Ht @ cm.clip(-1e40, 1e40) @ H # clip infs cov_array_real[spw] = np.moveaxis(cm, 0, -1) cov_array_imag[spw] = np.zeros_like(cov_array_real[spw]) @@ -874,7 +874,7 @@ def spherical_average(uvp_in, kbins, bin_widths, blpair_groups=None, time_avg=Fa uvp.lst_2_array = np.unique(uvp_in.lst_2_array) # Add to history - uvp.history += version.history_string(notes=add_to_history) + uvp.history = "Spherically averaged with hera_pspec [{}]\n{}\n{}\n{}".format(version.git_hash[:15], add_to_history, '-'*40, uvp.history) # validity check if run_check: diff --git a/hera_pspec/plot.py b/hera_pspec/plot.py index 9957f9e8..8c0c69d9 100644 --- a/hera_pspec/plot.py +++ b/hera_pspec/plot.py @@ -301,7 +301,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real', deltasq=False, log=True, lst_in_hrs=True, vmin=None, vmax=None, cmap='YlGnBu', axes=None, figsize=(14, 6), force_plot=False, times=None, - title_type='blpair', colorbar=True): + title_type='blpair', colorbar=True, **kwargs): """ Plot a 1D delay spectrum waterfall (or spectra) for a group of baselines. @@ -374,6 +374,9 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real', colorbar : bool, optional Whether to make a colorbar. Default: True + kwargs : keyword arguments + Additional kwargs to pass to ax.matshow() + Returns ------- fig : matplotlib.pyplot.Figure @@ -558,7 +561,7 @@ def delay_waterfall(uvp, blpairs, spw, pol, component='abs-real', # plot waterfall cax = ax.matshow(waterfall[key], cmap=cmap, aspect='auto', vmin=vmin, vmax=vmax, - extent=[x[0], x[-1], y[-1], y[0]]) + extent=[x[0], x[-1], y[-1], y[0]], **kwargs) # ax config ax.xaxis.set_ticks_position('bottom') diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py index 6aca712c..bf0a9332 100644 --- a/hera_pspec/pspecdata.py +++ b/hera_pspec/pspecdata.py @@ -892,10 +892,11 @@ def set_R(self, d): d : dict Dictionary containing data to insert into data-weighting R matrix cache. Keys are tuples with the following form - key = (dset_index, bl_ant_pair_pol_tuple, data_weighting, taper) - Ex: (0, (37, 38, 'xx'), 'bh') - If data_weight == 'dayenu' then additional elements are appended - key + (filter_extension, spw_Nfreqs, symmetric_taper) + `key = (dset_index, bl_ant_pair_pol_tuple, data_weighting, taper)` + Example: `(0, (37, 38, 'xx'), 'bh')` + + If data_weight == 'dayenu' then additional elements are appended: + `key + (filter_extension, spw_Nfreqs, symmetric_taper)` """ for k in d: self._R[k] = d[k] @@ -1066,22 +1067,30 @@ def set_r_param(self, key, r_params): Parameters ---------- - key: tuple (dset, bl, [pol]), where dset is the index of the dataset - bl is a 2-tuple - pol is a float or string specifying polarization - - r_params: dictionary with parameters for weighting matrix. - Proper fields - and formats depend on the mode of data_weighting. - data_weighting == 'dayenu': - dictionary with fields - 'filter_centers', list of floats (or float) specifying the (delay) channel numbers - at which to center filtering windows. Can specify fractional channel number. - 'filter_half_widths', list of floats (or float) specifying the width of each - filter window in (delay) channel numbers. Can specify fractional channel number. - 'filter_factors', list of floats (or float) specifying how much power within each filter window - is to be suppressed. - Absence of r_params dictionary will result in an error! + key: tuple + Key in the format: (dset, bl, [pol]) + `dset` is the index of the dataset, `bl` is a 2-tuple, `pol` is a + float or string specifying polarization. + + r_params: dict + Dict containing parameters for weighting matrix. Proper fields and + formats depend on the mode of data_weighting. + + For `data_weighting` set to `dayenu`, this is a dictionary with the + following fields: + + `filter_centers`, list of floats (or float) specifying the (delay) + channel numbers at which to center filtering windows. Can specify + fractional channel number. + + `filter_half_widths`, list of floats (or float) specifying the + width of each filter window in (delay) channel numbers. Can specify + fractional channel number. + + `filter_factors`, list of floats (or float) specifying how much + power within each filter window is to be suppressed. + + Absence of an `r_params` dictionary will result in an error. """ key = self.parse_blkey(key) key = (self.data_weighting,) + key @@ -1594,8 +1603,9 @@ def get_unnormed_E(self, key1, key2, exact_norm=False, pol=False): return 0.5 * E_matrices - def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=False, - time_index=None): + + def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, + pol=False, time_index=None): """ Calculates the covariance matrix for unnormed bandpowers (i.e., the q vectors). If the data were real and x_1 = x_2, the expression would be @@ -1608,8 +1618,8 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa .. math :: E^{12,a} = (1/2) R_1 Q^a R_2 - C^1 = - - C^2 = - + C^1 = - + C^2 = - P^{12} = - S^{12} = - @@ -1660,19 +1670,24 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa Used only if exact_norm is True. model : string, optional - Type of covariance model to calculate, if not cached. Options=['empirical', 'dsets', 'autos'] + Type of covariance model to calculate, if not cached. + Options=['empirical', 'dsets', 'autos'] How the covariances of the input data should be estimated. + In 'dsets' mode, error bars are estimated from user-provided - per baseline and per channel standard deivations. - If 'empirical' is set, then error bars are estimated from the data by averaging the - channel-channel covariance of each baseline over time and - then applying the appropriate linear transformations to these - frequency-domain covariances. - If 'autos' is set, the covariances of the input data - over a baseline is estimated from the autocorrelations of the two antennas over channel bandwidth - and integration time. - - time_index : integer, compute covariance at specific time-step + per baseline and per channel standard deivations. + + If 'empirical' is set, then error bars are estimated from the data + by averaging the channel-channel covariance of each baseline over + time and then applying the appropriate linear transformations to + these frequency-domain covariances. + + If 'autos' is set, the covariances of the input data over a + baseline is estimated from the autocorrelations of the two antennas + over channel bandwidth and integration time. + + time_index : int, optional + Compute covariance at specific time-step. Default: None. Returns ------- @@ -1683,8 +1698,10 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa E_matrices = self.get_unnormed_E(key1, key2, exact_norm=exact_norm, pol=pol) C1 = self.C_model(key1, model=model, time_index=time_index) C2 = self.C_model(key2, model=model, time_index=time_index) - P21 = self.cross_covar_model(key2, key1, model=model, conj_1=False, conj_2=False, time_index=time_index) - S21 = self.cross_covar_model(key2, key1, model=model, conj_1=True, conj_2=True, time_index=time_index) + P21 = self.cross_covar_model(key2, key1, model=model, conj_1=False, + conj_2=False, time_index=time_index) + S21 = self.cross_covar_model(key2, key1, model=model, conj_1=True, + conj_2=True, time_index=time_index) E21C1 = np.dot(np.transpose(E_matrices.conj(), (0,2,1)), C1) E12C2 = np.dot(E_matrices, C2) @@ -1695,62 +1712,78 @@ def get_unnormed_V(self, key1, key2, model='empirical', exact_norm=False, pol=Fa return auto_term + cross_term - def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=False, model='empirical', known_cov=None): + def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, + pol=False, model='empirical', known_cov=None): """ Calculates the auto-covariance matrix for both the real and imaginary parts of bandpowers (i.e., the q vectors and the p vectors). - Define - - .. math :: + Define: + Real part of q_a = (1/2) (q_a + q_a^*) - Imaginary part of q_a = (1/2i) (q_a - q_a^dagger) - Real part of p_a = (1/2) (p_a + p_a^dagger) - Imaginary part of p_a = (1/2i) (p_a - p_a^dagger) - + Imaginary part of q_a = (1/2i) (q_a - q_a^\dagger) + Real part of p_a = (1/2) (p_a + p_a^\dagger) + Imaginary part of p_a = (1/2i) (p_a - p_a^\dagger) + + .. math :: + E^{12,a} = (1/2) R_1 Q^a R_2 - C^{12} = - + C^{12} = - P^{12} = - S^{12} = - p_a = M_{ab} q_b - Then + Then: + + The variance of (1/2) (q_a + q_a^\dagger): + + .. math :: + + (1/4){ ( - ) + 2( - ) + + ( - ) } + The variance of (1/2i) (q_a - q_a^\dagger): + .. math :: - The variance of (1/2) (q_a + q_a^dagger): - (1/4){ ( - ) + 2( - ) - + ( - ) } - - The variance of (1/2i) (q_a - q_a^dagger) : - (-1/4){ ( - ) - 2( - ) - + ( - ) } - - The variance of (1/2) (p_a + p_a^dagger): - (1/4) { M_{ab} M_{ac} ( - ) + - M_{ab} M_{ac}^* ( - ) + - M_{ab}^* M_{ac} ( - ) + - M_{ab}^* M_{ac}^* ( - ) } - - The variance of (1/2i) (p_a - p_a^dagger): - (-1/4) { M_{ab} M_{ac} ( - ) - - M_{ab} M_{ac}^* ( - ) - - M_{ab}^* M_{ac} ( - ) + - M_{ab}^* M_{ac}^* ( - ) } + + (-1/4){ ( - ) - 2( - ) + + ( - ) } + + The variance of (1/2) (p_a + p_a^\dagger): + + .. math :: + + (1/4) { M_{ab} M_{ac} ( - ) + + M_{ab} M_{ac}^* ( - ) + + M_{ab}^* M_{ac} ( - ) + + M_{ab}^* M_{ac}^* ( - ) } + + The variance of (1/2i) (p_a - p_a^\dagger): + + .. math :: + + (-1/4) { M_{ab} M_{ac} ( - ) - + M_{ab} M_{ac}^* ( - ) - + M_{ab}^* M_{ac} ( - ) + + M_{ab}^* M_{ac}^* ( - ) } where - - = - tr(E^{12,a} C^{21} E^{12,b} C^{21}) - + tr(E^{12,a} P^{22} E^{21,b*} S^{11}) - - = - tr(E^{12,a} C^{22} E^{21,b} C^{11}) - + tr(E^{12,a} P^{21} E^{12,b *} S^{21}) - - = - tr(E^{21,a} C^{12} E^{21,b} C^{12}) - + tr(E^{21,a} P^{11} E^{12,b *} S^{22}) + + .. math :: + - = + tr(E^{12,a} C^{21} E^{12,b} C^{21}) + + tr(E^{12,a} P^{22} E^{21,b*} S^{11}) + - = + tr(E^{12,a} C^{22} E^{21,b} C^{11}) + + tr(E^{12,a} P^{21} E^{12,b *} S^{21}) + - = + tr(E^{21,a} C^{12} E^{21,b} C^{12}) + + tr(E^{21,a} P^{11} E^{12,b *} S^{22}) Note that .. math :: + E^{12,a}_{ij}.conj = E^{21,a}_{ji} This function estimates C^1, C^2, P^{12}, and S^{12} empirically by @@ -1786,34 +1819,47 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals Used only if exact_norm is True. model : string, optional - Type of covariance model to use. if not cached. Options=['empirical', 'dsets', 'autos', 'foreground_dependent', - (other model names in known_cov)] - In 'dsets' mode, error bars are estimated from user-provided - per baseline and per channel standard deivations. In 'empirical' mode, - error bars are estimated from the data by averaging the - channel-channel covariance of each baseline over time and - then applying the appropriate linear transformations to these - frequency-domain covariances. In 'autos' mode, the covariances of the input data - over a baseline is estimated from the autocorrelations of the two antennas forming the baseline - across channel bandwidth and integration time. - In 'foreground_dependent' mode, it involves using auto-correlation amplitudes to model the input noise covariance - and visibility outer products to model the input systematics covariance. - - ############ - When model is chosen as "autos" or "dsets", only C^{11} and C^{22} are accepted as non-zero values, - and the two matrices are also expected to be diagonal, - thus only - = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] exists - in the covariance terms of q vectors. - When model is chosen as 'foreground_dependent', we further include the signal-noise coupling term - besides the noise in the output covariance. Still only - is non-zero, - while it takes a form of tr[ E^{12,a} Cn^{22} E^{21,b} Cn^{11} + - E^{12,a} Cs^{22} E^{21,b} Cn^{11} + + Type of covariance model to use. if not cached. + Options=['empirical', 'dsets', 'autos', 'foreground_dependent', + (other model names in known_cov)]. + + In `dsets` mode, error bars are estimated from user-provided + per baseline and per channel standard deivations. + + In `empirical` mode, error bars are estimated from the data by + averaging the channel-channel covariance of each baseline over time + and then applying the appropriate linear transformations to these + frequency-domain covariances. + + In `autos` mode, the covariances of the input data over a baseline + is estimated from the autocorrelations of the two antennas forming + the baseline across channel bandwidth and integration time. + + In `foreground_dependent` mode, it involves using auto-correlation + amplitudes to model the input noise covariance and visibility outer + products to model the input systematics covariance. + + When model is chosen as `autos` or `dsets`, only C^{11} and C^{22} + are accepted as non-zero values, and the two matrices are also + expected to be diagonal, thus only + - = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] + exists in the covariance terms of q vectors. + + When model is chosen as `foreground_dependent`, we further include + the signal-noise coupling term besides the noise in the output + covariance. Still only - is + non-zero, while it takes a form of + tr[ E^{12,a} Cn^{22} E^{21,b} Cn^{11} + + E^{12,a} Cs^{22} E^{21,b} Cn^{11} + E^{12,a} Cn^{22} E^{21,b} Cs^{11} ], - where Cn is just Cautos, the input noise covariance estimated by the auto-correlation amplitudes (by calling C_model(model='autos')), and - Cs uses the outer product of input visibilities to model the covariance on systematics. + where Cn is just Cautos, the input noise covariance estimated by + the auto-correlation amplitudes (by calling C_model(model='autos')), + and Cs uses the outer product of input visibilities to model the + covariance on systematics. + To construct a symmetric and unbiased covariance matrix, we choose - Cs^{11}_{ij} = Cs^{22}_{ij} = 1/2 * [ x1_i x2_j^{*} + x2_i x1_j^{*} ], which preserves the property Cs_{ij}^* = Cs_{ji}. - ############# + Cs^{11}_{ij} = Cs^{22}_{ij} = 1/2 * [ x1_i x2_j^{*} + x2_i x1_j^{*} ], + which preserves the property Cs_{ij}^* = Cs_{ji}. known_cov : dicts of covariance matrices Covariance matrices that are not calculated internally from data. @@ -1849,7 +1895,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals cov_q_real, cov_q_imag, cov_p_real, cov_p_imag = [], [], [], [] for time_index in range(self.dsets[0].Ntimes): if model in ['dsets','autos']: - # calculate - = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] + # calculate - = tr[ E^{12,a} C^{22} E^{21,b} C^{11} ] # We have used tr[A D_1 B D_2] = \sum_{ijkm} A_{ij} d_{1j} \delta_{jk} B_{km} d_{2m} \delta_{mi} = \sum_{ik} [A_{ik}*d_{1k}] * [B_{ki}*d_{2i}] # to simplify the computation. C11 = self.C_model(key1, model=model, known_cov=known_cov, time_index=time_index) @@ -1946,8 +1992,8 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals else: assert np.shape(q_q) == np.shape(m), "covariance matrix and normalization matrix has different shapes." MMq_q = np.einsum('ab,cd,bd->ac', m, m, q_q, optimize=einstein_path_2) - # calculate \sum_{bd} [ M_{ab} M_{cd}^* ( - ) ] - # and \sum_{bd} [ M_{ab}^* M_{cd} ( - ) ] + # calculate \sum_{bd} [ M_{ab} M_{cd}^* ( - ) ] + # and \sum_{bd} [ M_{ab}^* M_{cd} ( - ) ] if np.isclose([q_qdagger], 0).all(): MM_q_qdagger = 0.+1.j*0 M_Mq_qdagger_ = 0.+1.j*0 @@ -1955,7 +2001,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals assert np.shape(q_qdagger) == np.shape(m), "covariance matrix and normalization matrix has different shapes." MM_q_qdagger = np.einsum('ab,cd,bd->ac', m, m.conj(), q_qdagger, optimize=einstein_path_2) M_Mq_qdagger_ = np.einsum('ab,cd,bd->ac', m.conj(), m, q_qdagger.conj(), optimize=einstein_path_2) - # calculate \sum_{bd} [ M_{ab}^* M_{cd}^* ( - ) ] + # calculate \sum_{bd} [ M_{ab}^* M_{cd}^* ( - ) ] if np.isclose([qdagger_qdagger], 0).all(): M_M_qdagger_qdagger = 0.+1.j*0 else: @@ -1989,6 +2035,7 @@ def get_analytic_covariance(self, key1, key2, M=None, exact_norm=False, pol=Fals return cov_q_real, cov_q_imag, cov_p_real, cov_p_imag + def get_MW(self, G, H, mode='I', band_covar=None, exact_norm=False, rcond=1e-15): """ Construct the normalization matrix M and window function matrix W for @@ -2676,7 +2723,8 @@ def validate_pol(self, dsets, pol_pair): def pspec(self, bls1, bls2, dsets, pols, n_dlys=None, input_data_weight='identity', norm='I', taper='none', sampling=False, little_h=True, spw_ranges=None, symmetric_taper=True, - baseline_tol=1.0, store_cov=False, store_cov_diag=False, return_q=False, store_window=True, verbose=True, + baseline_tol=1.0, store_cov=False, store_cov_diag=False, + return_q=False, store_window=True, verbose=True, filter_extensions=None, exact_norm=False, history='', r_params=None, cov_model='empirical', known_cov=None): """ @@ -2771,11 +2819,13 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None, in the UVPSpec object. store_cov_diag : bool, optional - If True, store the square root of the diagonal of the output covariance matrix - calculated by using get_analytic_covariance(). The error bars will - be stored in the form of: sqrt(diag(cov_array_real)) + 1.j*sqrt(diag(cov_array_imag)). - It's a way to save the disk space since the whole cov_array data with a size of Ndlys x Ndlys x Ntimes x Nblpairs x Nspws - is too large. + If True, store the square root of the diagonal of the output + covariance matrix calculated by using get_analytic_covariance(). + The error bars will be stored in the form of: + `sqrt(diag(cov_array_real)) + 1.j*sqrt(diag(cov_array_imag))`. + It's a way to save the disk space since the whole cov_array data + with a size of Ndlys x Ndlys x Ntimes x Nblpairs x Nspws is too + large. return_q : bool, optional If True, return the results (delay spectra and covariance @@ -2786,30 +2836,42 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None, Default: True cov_model : string, optional - Type of covariance model to calculate, if not cached. Options=['empirical', 'dsets', 'autos', 'foreground_dependent', + Type of covariance model to calculate, if not cached. + + Options=['empirical', 'dsets', 'autos', 'foreground_dependent', (other model names in known_cov)] - In 'dsets' mode, error bars are estimated from user-provided per baseline and per channel standard deivations. - In 'empirical' mode, error bars are estimated from the data by averaging the - channel-channel covariance of each baseline over time and - then applying the appropriate linear transformations to these - frequency-domain covariances. - In 'autos' mode, the covariances of the input data - over a baseline is estimated from the autocorrelations of the two antennas forming the baseline - across channel bandwidth and integration time. - In 'foreground_dependent' mode, it involves using auto-correlation amplitudes to model the input noise covariance - and visibility outer products to model the input systematics covariance. + + In 'dsets' mode, error bars are estimated from user-provided per + baseline and per channel standard deivations. + + In 'empirical' mode, error bars are estimated from the data by + averaging the channel-channel covariance of each baseline over + time and then applying the appropriate linear transformations to + these frequency-domain covariances. + + In 'autos' mode, the covariances of the input data over a baseline + is estimated from the autocorrelations of the two antennas forming + the baseline across channel bandwidth and integration time. + + In 'foreground_dependent' mode, it involves using auto-correlation + amplitudes to model the input noise covariance and visibility + outer products to model the input systematics covariance. + For more details see ds.get_analytic_covariance(). known_cov : dicts of input covariance matrices - known_cov has a type {Ckey:covariance}, which is the same with - ds._C. The matrices stored in known_cov are constructed - outside the PSpecData object, different from those in ds._C which are constructed + `known_cov` has a type {Ckey:covariance}, which is the same as + ds._C. The matrices stored in known_cov are constructed outside + the PSpecData object, unlike those in ds._C which are constructed internally. - The Ckey should conform to - (dset_pair_index, blpair_int, model, time_index, conj_1, conj_2), - e.g. ((0, 1), ((25,37,"xx"), (25, 37, "xx")), 'empirical', False, True), + + The Ckey should conform to: + `(dset_pair_index, blpair_int, model, time_index, conj_1, conj_2)`, + e.g. + `((0, 1), ((25,37,"xx"), (25, 37, "xx")), 'empirical', False, True)`, while covariance are ndarrays with shape (Nfreqs, Nfreqs). - Also see PSpecData.set_C() for more details. + + Also see PSpecData.set_C() for more details. verbose : bool, optional If True, print progress, warnings and debugging info to stdout. @@ -2824,20 +2886,26 @@ def pspec(self, bls1, bls2, dsets, pols, n_dlys=None, False runs two times faster than setting it to True. history : str, optional - history string to attach to UVPSpec object + History string to attach to UVPSpec object r_params: dictionary with parameters for weighting matrix. - Proper fields - and formats depend on the mode of data_weighting. - data_weighting == 'dayenu': - dictionary with fields - 'filter_centers', list of floats (or float) specifying the (delay) channel numbers - at which to center filtering windows. Can specify fractional channel number. - 'filter_half_widths', list of floats (or float) specifying the width of each - filter window in (delay) channel numbers. Can specify fractional channel number. - 'filter_factors', list of floats (or float) specifying how much power within each filter window - is to be suppressed. - Absence of r_params dictionary will result in an error! + Proper fields and formats depend on the mode of data_weighting. + + For `data_weighting` set to 'dayenu', `r_params` should be a dict + with the following fields: + + `filter_centers`, a list of floats (or float) specifying the + (delay) channel numbers at which to center filtering windows. Can + specify fractional channel number. + + `filter_half_widths`, a list of floats (or float) specifying the + width of each filter window in (delay) channel numbers. Can specify + fractional channel number. + + `filter_factors`, list of floats (or float) specifying how much + power within each filter window is to be suppressed. + + Absence of an `r_params` dictionary will result in an error. Returns ------- diff --git a/hera_pspec/tests/test_grouping.py b/hera_pspec/tests/test_grouping.py index 63b63d1e..44837860 100644 --- a/hera_pspec/tests/test_grouping.py +++ b/hera_pspec/tests/test_grouping.py @@ -469,6 +469,13 @@ def test_spherical_average(): # bug check: in the past, combine after spherical average erroneously changed dly_array assert sph == sph_c + # insert an inf into the arrays as a test + uvp2 = copy.deepcopy(uvp) + uvp2.cov_array_real[0][0], uvp2.cov_array_imag[0][0] = np.inf, np.inf + uvp2.stats_array['err'][0][0] = np.inf + sph = grouping.spherical_average(uvp, kbins, bin_widths) + assert np.isfinite(sph.cov_array_real[0]).all() + # exceptions nt.assert_raises(AssertionError, grouping.spherical_average, uvp, kbins, 1.0) diff --git a/hera_pspec/tests/test_noise.py b/hera_pspec/tests/test_noise.py index 46d73970..517bba9d 100644 --- a/hera_pspec/tests/test_noise.py +++ b/hera_pspec/tests/test_noise.py @@ -159,22 +159,27 @@ def test_analytic_noise(): # get P_N estimate auto_Tsys = utils.uvd_to_Tsys(uvd, beam, os.path.join(DATA_PATH, "test_uvd.uvh5")) assert os.path.exists(os.path.join(DATA_PATH, "test_uvd.uvh5")) - utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN']) + utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=False) # check consistency of 1-sigma standard dev. to 1% cov_diag = uvp.cov_array_real[0][:, range(Nchan), range(Nchan)] stats_diag = uvp.stats_array['P_N'][0] frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag - assert np.abs(frac_ratio).mean() < 0.01 ## check P_SN consistency of 1-sigma standard dev. to 1% cov_diag = uvp_fg.cov_array_real[0][:, range(Nchan), range(Nchan)] stats_diag = uvp.stats_array['P_SN'][0] frac_ratio = (cov_diag**0.5 - stats_diag) / stats_diag - assert np.abs(frac_ratio).mean() < 0.01 + # now compute unbiased P_SN and check that it matches P_N at high-k + utils.uvp_noise_error(uvp, auto_Tsys, err_type=['P_N','P_SN'], P_SN_correction=True) + frac_ratio = (uvp.stats_array["P_SN"][0] - uvp.stats_array["P_N"][0]) / uvp.stats_array["P_N"][0] + dlys = uvp.get_dlys(0) * 1e9 + select = np.abs(dlys) > 3000 + assert np.abs(frac_ratio[:, select].mean()) < 1 / np.sqrt(uvp.Nblpairts) + # clean up os.remove(os.path.join(DATA_PATH, "test_uvd.uvh5")) diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py index 44017975..4c19800f 100644 --- a/hera_pspec/utils.py +++ b/hera_pspec/utils.py @@ -1301,7 +1301,7 @@ def uvd_to_Tsys(uvd, beam, Tsys_outfile=None): return uvd -def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None): +def uvp_noise_error(uvp, auto_Tsys=None, err_type='P_N', precomp_P_N=None, P_SN_correction=True): """ Calculate analytic thermal noise error for a UVPSpec object. Adds to uvp.stats_array inplace. @@ -1313,9 +1313,10 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None): If err_type == 'P_SN', uvp should not have any incoherent averaging applied. - auto_Tsys : UVData object + auto_Tsys : UVData object, optional Holds autocorrelation Tsys estimates in Kelvin (see uvd_to_Tsys) for all antennas and polarizations involved in uvp power spectra. + Needed for P_N computation, not needed if feeding precomp_P_N. err_type : str or list of str, options = ['P_N', 'P_SN'] Type of thermal noise error to compute. P_N is the standard @@ -1326,9 +1327,13 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None): which uses uses Re[P(tau)] as a proxy for P_S. To store both, feed as err_type = ['P_N', 'P_SN'] - precomp_P_N : str + precomp_P_N : str, optional If computing P_SN and P_N is already computed, use this key to index stats_array for P_N rather than computing it from auto_Tsys. + + P_SN_correctoin : bool, optional + Apply correction factor if computing P_SN to account for double + counting of noise. """ from hera_pspec import uvpspec_utils @@ -1336,9 +1341,10 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None): if isinstance(err_type, str): err_type = [err_type] - # get metadata - lsts = np.unique(auto_Tsys.lst_array) - freqs = auto_Tsys.freq_array[0] + # get metadata if needed + if precomp_P_N is None: + lsts = np.unique(auto_Tsys.lst_array) + freqs = auto_Tsys.freq_array[0] # iterate over spectral window for spw in uvp.spw_array: @@ -1393,10 +1399,47 @@ def uvp_noise_error(uvp, auto_Tsys, err_type='P_N', precomp_P_N=None): if 'P_SN' in err_type: # calculate P_SN: see Tan+2020 and # H1C_IDR2/notebooks/validation/errorbars_with_systematics_and_noise.ipynb + # get signal proxy P_S = uvp.get_data(key).real + # clip negative values P_S[P_S < 0] = 0 P_SN = np.sqrt(np.sqrt(2) * P_S * P_N + P_N**2) # catch nans, set to inf P_SN[np.isnan(P_SN)] = np.inf # set stats uvp.set_stats('P_SN', key, P_SN) + + # P_SN correction + if P_SN_correction and "P_SN" in err_type: + if precomp_P_N is None: + precomp_P_N = 'P_N' + apply_P_SN_correction(uvp, P_SN='P_SN', P_N=precomp_P_N) + + +def apply_P_SN_correction(uvp, P_SN='P_SN', P_N='P_N'): + """ + Apply correction factor to P_SN errorbar in stats_array to account + for double counting of noise by using data as proxy for signal. + See Jianrong Tan et al. 2021 (Errorbar methodologies). + Operates in place. Must have both P_SN and P_N in stats_array. + + Args: + uvp : UVPSpec object + With P_SN and P_N errorbar (not variance) as a key in stats_array + P_SN : str + Key in stats_array for P_SN errorbar + P_N : str + Key in stats_array for P_N errorbar + """ + assert P_SN in uvp.stats_array + assert P_N in uvp.stats_array + for spw in uvp.spw_array: + # get P_SN and P_N + p_n = uvp.stats_array[P_N][spw] + p_sn = uvp.stats_array[P_SN][spw] + # derive correction + corr = 1 - (np.sqrt(1 / np.sqrt(np.pi) + 1) - 1) * p_n.real / p_sn.real.clip(1e-40, np.inf) + corr[np.isclose(corr, 0)] = np.inf + corr[corr < 0] = np.inf + # apply correction + uvp.stats_array[P_SN][spw] *= corr diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py index 8fa3b640..2b3690a8 100644 --- a/hera_pspec/uvpspec.py +++ b/hera_pspec/uvpspec.py @@ -362,24 +362,22 @@ def get_integrations(self, key, omit_flags=False): def get_r_params(self): """ - decompress r_params dictionary (so it can be readily used). - in a pspecdata object, r_parms are stored as a dictionary - with one entry per baseline. - In uvpspec, the dictionary is compressed so that a single r_param entry - correspondsto multiple baselines and is stored as a json format string. - get_r_params() reads the compressed string and returns the dictionary - with the format - r_params: dictionary with parameters for weighting matrix. - Proper fields - and formats depend on the mode of data_weighting. - data_weighting == 'dayenu': - dictionary with fields - 'filter_centers', list of floats (or float) specifying the (delay) channel numbers - at which to center filtering windows. Can specify fractional channel number. - 'filter_half_widths', list of floats (or float) specifying the width of each - filter window in (delay) channel numbers. Can specify fractional channel number. - 'filter_factors', list of floats (or float) specifying how much power within each filter window - is to be suppressed. + Return an `r_params` dictionary. + + In a `PSpecData` object, the `r_params` are stored as a dictionary with + one entry per baseline. + + In a `UVPSpec` object, the dictionary is compressed so that a single + `r_param` entry correspondsto multiple baselines and is stored as a + JSON format string. + + This function reads the compressed string and returns the dictionary + with the correct following fields and structure. + + Returns + ------- + r_params : dict + Dictionary of r_params for this object. """ return uvputils.decompress_r_params(self.r_params) @@ -1776,13 +1774,15 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None, where scalar is the cosmological and beam scalar (i.e. X2Y * Omega_eff) calculated from pspecbeam with noise_scalar = True, integration_time is in seconds and comes from self.integration_array and Nincoherent is the - number of incoherent averaging samples and comes from - self.nsample_array. If component is 'real' or 'imag', P_N is divided by - an additional factor of sqrt(2). + number of incoherent averaging samples and comes from `self.nsample_array`. + If `component` is `real` or `imag`, P_N is divided by an additional + factor of sqrt(2). If the polarizations specified are pseudo Stokes pol (I, Q, U or V) then an extra factor of 2 is divided. - If form == 'DelSq' then a factor of |k|^3 / (2pi^2) is multiplied. + + If `form` is `DelSq` then a factor of `|k|^3 / (2pi^2)` is multiplied. + If real is True, a factor of sqrt(2) is divided to account for discarding imaginary noise component. @@ -1822,8 +1822,9 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None, Number of frequency bins to use in integrating power spectrum scalar in pspecbeam. Default: 2000. - component : str, options=['real', 'imag', 'abs'] - If component is real or imag, divide by an extra factor of sqrt(2) + component : str + Options=['real', 'imag', 'abs']. + If component is real or imag, divide by an extra factor of sqrt(2). Returns ------- @@ -1909,14 +1910,14 @@ def generate_noise_spectra(self, spw, polpair, Tsys, blpairs=None, def average_spectra(self, blpair_groups=None, time_avg=False, blpair_weights=None, error_field=None, error_weights=None, - inplace=True): + inplace=True, add_to_history=''): """ Average power spectra across the baseline-pair-time axis, weighted by each spectrum's integration time. This is an "incoherent" average, in the sense that this averages power - spectra, rather than visibility data. The 'nsample_array' and - 'integration_array' will be updated to reflect the averaging. + spectra, rather than visibility data. The `nsample_array` and + `integration_array` will be updated to reflect the averaging. In the case of averaging across baseline pairs, the resultant averaged spectrum is assigned to the zeroth blpair in the group. In the case of @@ -1929,7 +1930,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False, equivalent to cylindrical binning in 3D k-space. If you want help constructing baseline-pair groups from baseline - groups, see self.get_blpair_groups_from_bl_groups. + groups, see `self.get_blpair_groups_from_bl_groups`. Parameters ---------- @@ -1937,6 +1938,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False, List of list of baseline-pair group tuples or integers. All power spectra in a baseline-pair group are averaged together. If a baseline-pair exists in more than one group, a warning is raised. + Examples:: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], @@ -1951,6 +1953,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False, blpair_weights : list, optional List of float or int weights dictating the relative weight of each baseline-pair when performing the average. + This is useful for bootstrapping. This should have the same shape as blpair_groups if specified. The weights are automatically normalized within each baseline-pair group. Default: None (all @@ -1965,17 +1968,21 @@ def average_spectra(self, blpair_groups=None, time_avg=False, error_weights: string, optional error_weights specify which kind of errors we use for weights - during averaging power spectra. - The weights are defined as $w_i = 1/ sigma_i^2$, - where $sigma_i$ is taken from the relevant field of stats_array. - If `error_weight' is set to None, which means we just use the - integration time as weights. If error_weights is specified, - then it also gets appended to error_field as a list. - Default: None + during averaging power spectra. The weights are defined as + $w_i = 1/ sigma_i^2$, where $sigma_i$ is taken from the relevant + field of stats_array. + + If `error_weight` is set to `None`, which means we just use the + integration time as weights. If `error_weights` is specified, + then it also gets appended to `error_field` as a list. + Default: None. inplace : bool, optional - If True, edit data in self, else make a copy and return. Default: - True. + If True, edit data in self, else make a copy and return. + Default: True. + + add_to_history : str, optional + Added text to add to file history. Notes ----- @@ -1991,14 +1998,14 @@ def average_spectra(self, blpair_groups=None, time_avg=False, error_field=error_field, error_weights=error_weights, blpair_weights=blpair_weights, - inplace=True) + inplace=True, add_to_history=add_to_history) else: return grouping.average_spectra(self, blpair_groups=blpair_groups, time_avg=time_avg, error_field=error_field, error_weights=error_weights, blpair_weights=blpair_weights, - inplace=False) + inplace=False, add_to_history=add_to_history) def fold_spectra(self): """