diff --git a/bin/bootstrapQMLE.py b/bin/bootstrapQMLE.py index 5e970cd..84d1900 100755 --- a/bin/bootstrapQMLE.py +++ b/bin/bootstrapQMLE.py @@ -6,18 +6,18 @@ import argparse import sys -from os.path import join as ospath_join, \ - getsize as ospath_getsize, dirname as ospath_dir - -import glob +from os.path import ( + join as ospath_join, + getsize as ospath_getsize, + dirname as ospath_dir +) import struct -import time import logging import numpy as np from numba import njit +from scipy.linalg import cho_factor, cho_solve -from qsotools.utils import Progress def getNumbersfromBootfile(fname): filesize = ospath_getsize(fname) @@ -27,34 +27,38 @@ def getNumbersfromBootfile(fname): Nz = int(struct.unpack('i', f.read(struct.calcsize('i')))[0]) Nd = int(struct.unpack('i', f.read(struct.calcsize('i')))[0]) - total_nkz = Nk*Nz + total_nkz = Nk * Nz if Nd == 3: - cf_size = 3*total_nkz - Nk - 1 + cf_size = 3 * total_nkz - Nk - 1 else: - cf_size = int(total_nkz*Nd - (Nd*(Nd-1))/2) + cf_size = int(total_nkz * Nd - (Nd * (Nd - 1)) / 2) - elems_count = cf_size + total_nkz; - one_data_size = struct.calcsize('d'*elems_count) + elems_count = cf_size + total_nkz + one_data_size = struct.calcsize('d' * elems_count) - nspec = int((filesize-3*struct.calcsize('i'))/one_data_size) + nspec = int((filesize - 3 * struct.calcsize('i')) / one_data_size) return Nk, Nz, Nd, total_nkz, elems_count, nspec # Returns data # First Nkz is power, rest is fisher that needs to be reshaped + + def readBootFile(fname, N): dt = np.dtype(('f8', N)) with open(fname, "rb") as bootfile: - spectra = np.fromfile(bootfile, offset=3*struct.calcsize('i'), dtype=dt) + spectra = np.fromfile( + bootfile, offset=3 * struct.calcsize('i'), dtype=dt) return spectra + @njit("Tuple((f8[:, :], f8[:, :, :]))(f8[:, :], i8, i8, i8, i8)") def getPSandFisher(v, nk, nd, total_nkz, rem_nz=0): nboot = v.shape[0] - newsize = total_nkz-rem_nz*nk + newsize = total_nkz - rem_nz * nk power = v[:, :newsize] fisher = np.zeros((nboot, newsize, newsize)) @@ -67,30 +71,47 @@ def getPSandFisher(v, nk, nd, total_nkz, rem_nz=0): fari = 0 for did in diag_idx: - for jj in range(newsize-did): - fisher[:, jj, jj+did] = farr[:, fari+jj] - fisher[:, jj+did, jj] = farr[:, fari+jj] + for jj in range(newsize - did): + fisher[:, jj, jj + did] = farr[:, fari + jj] + fisher[:, jj + did, jj] = farr[:, fari + jj] - fari += total_nkz-did + fari += total_nkz - did for jj in range(newsize): - fisher[:, jj, jj] = np.where(fisher[:, jj, jj]==0, 1, fisher[:, jj, jj]) + fisher[:, jj, jj] = np.where( + fisher[:, jj, jj] == 0, 1, fisher[:, jj, jj]) return power, fisher + @njit("i8[:, :](i8[:, :])") def getCounts(booted_indices): bootnum, no_spectra = booted_indices.shape - + counts = np.empty_like(booted_indices) for b in range(bootnum): counts[b] = np.bincount(booted_indices[b], minlength=no_spectra) return counts -def getOneSliceBoot(spectra, booted_indices, nspec, - Nk, Nd, total_nkz, elems_count, - remove_last_nz_bins, nboot_per_it): + +def my_cho_solve(fisher, y): + """ Since fisher matrix is positive definite and symmetric, + this function is faster than simple np.linalg.solve + (tested on jupyter notebook) + """ + res = np.empty_like(y) + for jj in range(fisher.shape[0]): + c, low = cho_factor(fisher[jj]) + res[jj] = cho_solve((c, low), y[jj]) + return res + + +def getOneSliceBoot( + spectra, booted_indices, nspec, + Nk, Nd, total_nkz, elems_count, + remove_last_nz_bins, nboot_per_it +): boot_counts = getCounts(booted_indices) logging.info(" > Generated boot indices.") @@ -99,23 +120,54 @@ def getOneSliceBoot(spectra, booted_indices, nspec, total_data = boot_counts @ spectra logging.info(" > Calculating bootstrapped inverse Fisher and power...") - total_power_b4, F = getPSandFisher(total_data, Nk, Nd, total_nkz, remove_last_nz_bins) - total_power = 0.5 * np.linalg.solve(F, total_power_b4) + total_power_b4, F = getPSandFisher( + total_data, Nk, Nd, total_nkz, remove_last_nz_bins) + total_power = 0.5 * my_cho_solve(F, total_power_b4) return total_power -if __name__ == '__main__': + +def calculate_original( + args, nspec, spectra, Nk, Nd, total_nkz, outdir +): + # Calculate original + if not args.calculate_originals: + return + + logging.info("Calculating original power.") + + total_data = np.ones((1, nspec)) @ spectra + total_power_b4, F = getPSandFisher( + total_data, Nk, Nd, total_nkz, args.remove_last_nz_bins) + orig_power = 0.5 * np.linalg.solve(F, total_power_b4) + + # Save power to a file + # Set up output file + output_fname = ospath_join( + outdir, f"{args.fbase}bootstrap-original-power.txt") + np.savetxt(output_fname, orig_power) + logging.info(f"Original power saved as {output_fname}.") + + output_fname = ospath_join( + outdir, f"{args.fbase}bootstrap-original-fisher.txt") + np.savetxt(output_fname, F[0]) + logging.info(f"Original fisher saved as {output_fname}.") + + +def main(): # Arguments passed to run the script - parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("Bootfile", help="File as described in QMLE.") - parser.add_argument("--bootnum", default=10000, type=int, + parser.add_argument( + "--bootnum", default=10000, type=int, help="Number of bootstrap resamples.") parser.add_argument("--nboot-per-it", default=10000, type=int, - help="Number of bootstraps to generate per iteration.") - parser.add_argument("--remove-last-nz-bins", default=0, type=int, + help="Number of bootstraps to generate per iteration.") + parser.add_argument( + "--remove-last-nz-bins", default=0, type=int, help="Remove last nz bins to obtain invertable Fisher.") parser.add_argument("--seed", help="Seed", default=3422, type=int) - # parser.add_argument("--save-cov", action="store_true") parser.add_argument("--save-powers", action="store_true") parser.add_argument("--calculate-originals", action="store_true") parser.add_argument("--fbase", default="") @@ -124,30 +176,22 @@ def getOneSliceBoot(spectra, booted_indices, nspec, outdir = ospath_dir(args.Bootfile) if outdir == "": outdir = "." + + if args.fbase and args.fbase[-1] != '-': + args.fbase += '-' + # Set up log logging.basicConfig(filename=f"{outdir}/bootstrapping.log", - level=logging.DEBUG) + level=logging.DEBUG) logging.info(" ".join(sys.argv)) - Nk, Nz, Nd, total_nkz, elems_count, nspec = getNumbersfromBootfile(args.Bootfile) - logging.info("There are %d subsamples.", nspec) - logging.info("Reading bootstrap dat file.") + Nk, Nz, Nd, total_nkz, elems_count, nspec = getNumbersfromBootfile( + args.Bootfile) + logging.info("There are %d subsamples. Reading bootstrap.dat file.", nspec) spectra = readBootFile(args.Bootfile, elems_count) - # Calculate original - if args.calculate_originals: - logging.info("Calculating original power.") - total_data = np.ones((1, nspec)) @ spectra - total_power_b4, F = getPSandFisher(total_data, Nk, Nd, total_nkz, args.remove_last_nz_bins) - orig_power = 0.5 * np.linalg.solve(F, total_power_b4) - # Save power to a file - # Set up output file - output_fname = ospath_join(outdir, f"{args.fbase}bootstrap-original-power.txt") - np.savetxt(output_fname, orig_power) - logging.info(f"Original power saved as {output_fname}.") - output_fname = ospath_join(outdir, f"{args.fbase}bootstrap-original-fisher.txt") - np.savetxt(output_fname, F[0]) - logging.info(f"Original fisher saved as {output_fname}.") + calculate_original( + args, nspec, spectra, Nk, Nd, total_nkz, outdir) if args.bootnum <= 0: logging.info(f"No bootstraps were asked.") @@ -155,30 +199,34 @@ def getOneSliceBoot(spectra, booted_indices, nspec, # Generate bootstrap realizations through indexes RND = np.random.default_rng(args.seed) - newpowersize = total_nkz-args.remove_last_nz_bins*Nk + newpowersize = total_nkz - args.remove_last_nz_bins * Nk total_power = np.empty((args.bootnum, newpowersize)) - n_iter = int(args.bootnum/args.nboot_per_it) + n_iter = int(args.bootnum / args.nboot_per_it) for jj in range(n_iter): logging.info(f"Iteration {jj+1}/{n_iter}.") - i1 = jj*args.nboot_per_it - i2 = i1+args.nboot_per_it - booted_indices = RND.integers(low=0, high=nspec, size=(args.nboot_per_it, nspec)) - total_power[i1:i2] = getOneSliceBoot(spectra, booted_indices, nspec, + i1 = jj * args.nboot_per_it + i2 = i1 + args.nboot_per_it + booted_indices = RND.integers( + low=0, high=nspec, size=(args.nboot_per_it, nspec)) + total_power[i1:i2] = getOneSliceBoot( + spectra, booted_indices, nspec, Nk, Nd, total_nkz, elems_count, args.remove_last_nz_bins, args.nboot_per_it) bootstrap_cov = np.cov(total_power, rowvar=False) - output_fname = ospath_join(outdir, - f"{args.fbase}bootstrap-cov-n{args.bootnum}-s{args.seed}.txt") + output_fname = ospath_join( + outdir, f"{args.fbase}bootstrap-cov-n{args.bootnum}-s{args.seed}.txt") np.savetxt(output_fname, bootstrap_cov) logging.info(f"Covariance saved as {output_fname}.") if args.save_powers: - output_fname = ospath_join(outdir, + output_fname = ospath_join( + outdir, f"{args.fbase}bootstrap-power-n{args.bootnum}-s{args.seed}.txt") np.savetxt(output_fname, total_power) logging.info(f"Power saved as {output_fname}.") - +if __name__ == '__main__': + main() diff --git a/bin/regularizeBootstrapCov.py b/bin/regularizeBootstrapCov.py index a1fcc65..905aad2 100644 --- a/bin/regularizeBootstrapCov.py +++ b/bin/regularizeBootstrapCov.py @@ -2,11 +2,13 @@ import numpy as np from os.path import dirname as ospath_dir + def normalize(matrix): fk_v = matrix.diagonal() norm = np.sqrt(np.outer(fk_v, fk_v)) norm[norm == 0] = 1 - return matrix/norm + return matrix / norm + def safe_inverse(matrix): invmatrix = matrix.copy() @@ -19,13 +21,16 @@ def safe_inverse(matrix): # evecs.T @ cov_boot @ evecs # Pat actually uses SVD, but that doesn't ensure positive definiteness -# He also uses bootstrap evecs -# Here, I tried to do the same with eigenvalues. This way positive definiteness is satisfied. +# He also uses bootstrap evecs +# Here, I tried to do the same with eigenvalues. +# This way positive definiteness is satisfied. # My findings: Bootstrap evecs works, chi2 slightly above mean dof # QMLE evecs also works, chi2 slightly below mean dof + + def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): - #1) Use bootstrap evecs if use_boot_base_evecs: + # 1) Use bootstrap evecs evals_boot, evecs_boot = np.linalg.eigh(boot_cov) s_cov_qmle_bootevecs = np.diag(evecs_boot.T @ qmle_cov @ evecs_boot) @@ -34,7 +39,7 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): evals_boot[small_vals] = s_cov_qmle_bootevecs[small_vals] newcov = evecs_boot @ np.diag(evals_boot) @ evecs_boot.T else: - #2) Use QMLE evecs + # 2) Use QMLE evecs evals_qmle, evecs_qmle = np.linalg.eigh(qmle_cov) s_cov_boot_qmleevecs = np.diag(evecs_qmle.T @ boot_cov @ evecs_qmle) @@ -45,16 +50,21 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): return newcov + if __name__ == '__main__': # Arguments passed to run the script - parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--boot-cov", help="Covariance from bootstrap file.", required=True) - parser.add_argument("--qmle-fisher",help="Fisher matrix from QMLE.", required=True) - parser.add_argument("--qmle-sparcity-cut", default=0.001, type=float, \ + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--boot-cov", help="Covariance from bootstrap file.", required=True) + parser.add_argument( + "--qmle-fisher", help="Fisher matrix from QMLE.", required=True) + parser.add_argument( + "--qmle-sparcity-cut", default=0.001, type=float, help="Sparsity pattern to cut using QMLE Fisher matrix.") parser.add_argument("--use-qmle-evecs", action="store_true") - parser.add_argument("--iterations", type=int, default=1, - help="Number of iterations") + parser.add_argument("--iterations", type=int, default=500, + help="Number of iterations") parser.add_argument("--reg-in-cov", action="store_true") parser.add_argument("--fbase", default="") args = parser.parse_args() @@ -67,9 +77,12 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): normalized_qmle_cov = normalize(qmle_covariance) normalized_qmle_fisher = normalize(qmle_fisher) - matrix_to_use_for_sparsity = normalized_qmle_cov if args.reg_in_cov else normalized_qmle_fisher + matrix_to_use_for_sparsity = ( + normalized_qmle_cov if args.reg_in_cov else normalized_qmle_fisher) + if args.qmle_sparcity_cut > 0: - qmle_sparcity = np.abs(matrix_to_use_for_sparsity) > args.qmle_sparcity_cut + qmle_sparcity = np.abs( + matrix_to_use_for_sparsity) > args.qmle_sparcity_cut else: qmle_sparcity = np.ones(qmle_fisher.shape, dtype=bool) @@ -79,13 +92,14 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): newcov = bootstrap_covariance * qmle_sparcity else: boostrap_fisher, _ = safe_inverse(bootstrap_covariance) - newcov, _ = safe_inverse(boostrap_fisher*qmle_sparcity) + newcov, _ = safe_inverse(boostrap_fisher * qmle_sparcity) - newcov = mcdonald_eval_fix(newcov, qmle_covariance, not args.use_qmle_evecs) - diff = np.abs(newcov-bootstrap_covariance) - bootstrap_covariance=newcov + newcov = mcdonald_eval_fix( + newcov, qmle_covariance, not args.use_qmle_evecs) + diff = np.abs(newcov - bootstrap_covariance) + bootstrap_covariance = newcov - if np.all(diff<1e-8): + if np.all(diff < 1e-8): print("Converged.") break @@ -93,12 +107,5 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True): bootstrap_covariance[idx, idx] = 0 basis_txt = "qmle" if args.use_qmle_evecs else "boot" - np.savetxt(f"{outdir}/{args.fbase}regularized-bootstrap-cov-{basis_txt}-evecs.txt", bootstrap_covariance) - - - - - - - - + finalfname = f"{args.fbase}regularized-bootstrap-cov-{basis_txt}-evecs.txt" + np.savetxt(f"{outdir}/{finalfname}", bootstrap_covariance) diff --git a/setup.py b/setup.py index 46cf2eb..91bdae0 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="qsotools", - version="2.5.2", + version="2.5.3", packages=find_namespace_packages(where='py'), package_dir={'': 'py/'}, scripts=binscripts,