Skip to content

Commit

Permalink
pep8 formatting. Using cholesky to solve (F, p) gives %30 speed up.
Browse files Browse the repository at this point in the history
  • Loading branch information
p-slash committed Mar 29, 2023
1 parent 91e2322 commit 96166a0
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 87 deletions.
166 changes: 107 additions & 59 deletions bin/bootstrapQMLE.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@

import argparse
import sys
from os.path import join as ospath_join, \
getsize as ospath_getsize, dirname as ospath_dir

import glob
from os.path import (
join as ospath_join,
getsize as ospath_getsize,
dirname as ospath_dir
)
import struct
import time
import logging

import numpy as np
from numba import njit
from scipy.linalg import cho_factor, cho_solve

from qsotools.utils import Progress

def getNumbersfromBootfile(fname):
filesize = ospath_getsize(fname)
Expand All @@ -27,34 +27,38 @@ def getNumbersfromBootfile(fname):
Nz = int(struct.unpack('i', f.read(struct.calcsize('i')))[0])
Nd = int(struct.unpack('i', f.read(struct.calcsize('i')))[0])

total_nkz = Nk*Nz
total_nkz = Nk * Nz

if Nd == 3:
cf_size = 3*total_nkz - Nk - 1
cf_size = 3 * total_nkz - Nk - 1
else:
cf_size = int(total_nkz*Nd - (Nd*(Nd-1))/2)
cf_size = int(total_nkz * Nd - (Nd * (Nd - 1)) / 2)

elems_count = cf_size + total_nkz;
one_data_size = struct.calcsize('d'*elems_count)
elems_count = cf_size + total_nkz
one_data_size = struct.calcsize('d' * elems_count)

nspec = int((filesize-3*struct.calcsize('i'))/one_data_size)
nspec = int((filesize - 3 * struct.calcsize('i')) / one_data_size)

return Nk, Nz, Nd, total_nkz, elems_count, nspec

# Returns data
# First Nkz is power, rest is fisher that needs to be reshaped


def readBootFile(fname, N):
dt = np.dtype(('f8', N))

with open(fname, "rb") as bootfile:
spectra = np.fromfile(bootfile, offset=3*struct.calcsize('i'), dtype=dt)
spectra = np.fromfile(
bootfile, offset=3 * struct.calcsize('i'), dtype=dt)

return spectra


@njit("Tuple((f8[:, :], f8[:, :, :]))(f8[:, :], i8, i8, i8, i8)")
def getPSandFisher(v, nk, nd, total_nkz, rem_nz=0):
nboot = v.shape[0]
newsize = total_nkz-rem_nz*nk
newsize = total_nkz - rem_nz * nk
power = v[:, :newsize]
fisher = np.zeros((nboot, newsize, newsize))

Expand All @@ -67,30 +71,47 @@ def getPSandFisher(v, nk, nd, total_nkz, rem_nz=0):

fari = 0
for did in diag_idx:
for jj in range(newsize-did):
fisher[:, jj, jj+did] = farr[:, fari+jj]
fisher[:, jj+did, jj] = farr[:, fari+jj]
for jj in range(newsize - did):
fisher[:, jj, jj + did] = farr[:, fari + jj]
fisher[:, jj + did, jj] = farr[:, fari + jj]

fari += total_nkz-did
fari += total_nkz - did

for jj in range(newsize):
fisher[:, jj, jj] = np.where(fisher[:, jj, jj]==0, 1, fisher[:, jj, jj])
fisher[:, jj, jj] = np.where(
fisher[:, jj, jj] == 0, 1, fisher[:, jj, jj])

return power, fisher


@njit("i8[:, :](i8[:, :])")
def getCounts(booted_indices):
bootnum, no_spectra = booted_indices.shape

counts = np.empty_like(booted_indices)
for b in range(bootnum):
counts[b] = np.bincount(booted_indices[b], minlength=no_spectra)

return counts

def getOneSliceBoot(spectra, booted_indices, nspec,
Nk, Nd, total_nkz, elems_count,
remove_last_nz_bins, nboot_per_it):

def my_cho_solve(fisher, y):
""" Since fisher matrix is positive definite and symmetric,
this function is faster than simple np.linalg.solve
(tested on jupyter notebook)
"""
res = np.empty_like(y)
for jj in range(fisher.shape[0]):
c, low = cho_factor(fisher[jj])
res[jj] = cho_solve((c, low), y[jj])
return res


def getOneSliceBoot(
spectra, booted_indices, nspec,
Nk, Nd, total_nkz, elems_count,
remove_last_nz_bins, nboot_per_it
):
boot_counts = getCounts(booted_indices)
logging.info(" > Generated boot indices.")

Expand All @@ -99,23 +120,54 @@ def getOneSliceBoot(spectra, booted_indices, nspec,
total_data = boot_counts @ spectra

logging.info(" > Calculating bootstrapped inverse Fisher and power...")
total_power_b4, F = getPSandFisher(total_data, Nk, Nd, total_nkz, remove_last_nz_bins)
total_power = 0.5 * np.linalg.solve(F, total_power_b4)
total_power_b4, F = getPSandFisher(
total_data, Nk, Nd, total_nkz, remove_last_nz_bins)
total_power = 0.5 * my_cho_solve(F, total_power_b4)

return total_power

if __name__ == '__main__':

def calculate_original(
args, nspec, spectra, Nk, Nd, total_nkz, outdir
):
# Calculate original
if not args.calculate_originals:
return

logging.info("Calculating original power.")

total_data = np.ones((1, nspec)) @ spectra
total_power_b4, F = getPSandFisher(
total_data, Nk, Nd, total_nkz, args.remove_last_nz_bins)
orig_power = 0.5 * np.linalg.solve(F, total_power_b4)

# Save power to a file
# Set up output file
output_fname = ospath_join(
outdir, f"{args.fbase}bootstrap-original-power.txt")
np.savetxt(output_fname, orig_power)
logging.info(f"Original power saved as {output_fname}.")

output_fname = ospath_join(
outdir, f"{args.fbase}bootstrap-original-fisher.txt")
np.savetxt(output_fname, F[0])
logging.info(f"Original fisher saved as {output_fname}.")


def main():
# Arguments passed to run the script
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("Bootfile", help="File as described in QMLE.")
parser.add_argument("--bootnum", default=10000, type=int,
parser.add_argument(
"--bootnum", default=10000, type=int,
help="Number of bootstrap resamples.")
parser.add_argument("--nboot-per-it", default=10000, type=int,
help="Number of bootstraps to generate per iteration.")
parser.add_argument("--remove-last-nz-bins", default=0, type=int,
help="Number of bootstraps to generate per iteration.")
parser.add_argument(
"--remove-last-nz-bins", default=0, type=int,
help="Remove last nz bins to obtain invertable Fisher.")
parser.add_argument("--seed", help="Seed", default=3422, type=int)
# parser.add_argument("--save-cov", action="store_true")
parser.add_argument("--save-powers", action="store_true")
parser.add_argument("--calculate-originals", action="store_true")
parser.add_argument("--fbase", default="")
Expand All @@ -124,61 +176,57 @@ def getOneSliceBoot(spectra, booted_indices, nspec,
outdir = ospath_dir(args.Bootfile)
if outdir == "":
outdir = "."

if args.fbase and args.fbase[-1] != '-':
args.fbase += '-'

# Set up log
logging.basicConfig(filename=f"{outdir}/bootstrapping.log",
level=logging.DEBUG)
level=logging.DEBUG)
logging.info(" ".join(sys.argv))

Nk, Nz, Nd, total_nkz, elems_count, nspec = getNumbersfromBootfile(args.Bootfile)
logging.info("There are %d subsamples.", nspec)
logging.info("Reading bootstrap dat file.")
Nk, Nz, Nd, total_nkz, elems_count, nspec = getNumbersfromBootfile(
args.Bootfile)
logging.info("There are %d subsamples. Reading bootstrap.dat file.", nspec)
spectra = readBootFile(args.Bootfile, elems_count)

# Calculate original
if args.calculate_originals:
logging.info("Calculating original power.")
total_data = np.ones((1, nspec)) @ spectra
total_power_b4, F = getPSandFisher(total_data, Nk, Nd, total_nkz, args.remove_last_nz_bins)
orig_power = 0.5 * np.linalg.solve(F, total_power_b4)
# Save power to a file
# Set up output file
output_fname = ospath_join(outdir, f"{args.fbase}bootstrap-original-power.txt")
np.savetxt(output_fname, orig_power)
logging.info(f"Original power saved as {output_fname}.")
output_fname = ospath_join(outdir, f"{args.fbase}bootstrap-original-fisher.txt")
np.savetxt(output_fname, F[0])
logging.info(f"Original fisher saved as {output_fname}.")
calculate_original(
args, nspec, spectra, Nk, Nd, total_nkz, outdir)

if args.bootnum <= 0:
logging.info(f"No bootstraps were asked.")
exit()

# Generate bootstrap realizations through indexes
RND = np.random.default_rng(args.seed)
newpowersize = total_nkz-args.remove_last_nz_bins*Nk
newpowersize = total_nkz - args.remove_last_nz_bins * Nk
total_power = np.empty((args.bootnum, newpowersize))
n_iter = int(args.bootnum/args.nboot_per_it)
n_iter = int(args.bootnum / args.nboot_per_it)

for jj in range(n_iter):
logging.info(f"Iteration {jj+1}/{n_iter}.")
i1 = jj*args.nboot_per_it
i2 = i1+args.nboot_per_it
booted_indices = RND.integers(low=0, high=nspec, size=(args.nboot_per_it, nspec))
total_power[i1:i2] = getOneSliceBoot(spectra, booted_indices, nspec,
i1 = jj * args.nboot_per_it
i2 = i1 + args.nboot_per_it
booted_indices = RND.integers(
low=0, high=nspec, size=(args.nboot_per_it, nspec))
total_power[i1:i2] = getOneSliceBoot(
spectra, booted_indices, nspec,
Nk, Nd, total_nkz, elems_count,
args.remove_last_nz_bins, args.nboot_per_it)

bootstrap_cov = np.cov(total_power, rowvar=False)
output_fname = ospath_join(outdir,
f"{args.fbase}bootstrap-cov-n{args.bootnum}-s{args.seed}.txt")
output_fname = ospath_join(
outdir, f"{args.fbase}bootstrap-cov-n{args.bootnum}-s{args.seed}.txt")
np.savetxt(output_fname, bootstrap_cov)
logging.info(f"Covariance saved as {output_fname}.")

if args.save_powers:
output_fname = ospath_join(outdir,
output_fname = ospath_join(
outdir,
f"{args.fbase}bootstrap-power-n{args.bootnum}-s{args.seed}.txt")
np.savetxt(output_fname, total_power)
logging.info(f"Power saved as {output_fname}.")



if __name__ == '__main__':
main()
61 changes: 34 additions & 27 deletions bin/regularizeBootstrapCov.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import numpy as np
from os.path import dirname as ospath_dir


def normalize(matrix):
fk_v = matrix.diagonal()
norm = np.sqrt(np.outer(fk_v, fk_v))
norm[norm == 0] = 1
return matrix/norm
return matrix / norm


def safe_inverse(matrix):
invmatrix = matrix.copy()
Expand All @@ -19,13 +21,16 @@ def safe_inverse(matrix):

# evecs.T @ cov_boot @ evecs
# Pat actually uses SVD, but that doesn't ensure positive definiteness
# He also uses bootstrap evecs
# Here, I tried to do the same with eigenvalues. This way positive definiteness is satisfied.
# He also uses bootstrap evecs
# Here, I tried to do the same with eigenvalues.
# This way positive definiteness is satisfied.
# My findings: Bootstrap evecs works, chi2 slightly above mean dof
# QMLE evecs also works, chi2 slightly below mean dof


def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True):
#1) Use bootstrap evecs
if use_boot_base_evecs:
# 1) Use bootstrap evecs
evals_boot, evecs_boot = np.linalg.eigh(boot_cov)

s_cov_qmle_bootevecs = np.diag(evecs_boot.T @ qmle_cov @ evecs_boot)
Expand All @@ -34,7 +39,7 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True):
evals_boot[small_vals] = s_cov_qmle_bootevecs[small_vals]
newcov = evecs_boot @ np.diag(evals_boot) @ evecs_boot.T
else:
#2) Use QMLE evecs
# 2) Use QMLE evecs
evals_qmle, evecs_qmle = np.linalg.eigh(qmle_cov)

s_cov_boot_qmleevecs = np.diag(evecs_qmle.T @ boot_cov @ evecs_qmle)
Expand All @@ -45,16 +50,21 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True):

return newcov


if __name__ == '__main__':
# Arguments passed to run the script
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--boot-cov", help="Covariance from bootstrap file.", required=True)
parser.add_argument("--qmle-fisher",help="Fisher matrix from QMLE.", required=True)
parser.add_argument("--qmle-sparcity-cut", default=0.001, type=float, \
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--boot-cov", help="Covariance from bootstrap file.", required=True)
parser.add_argument(
"--qmle-fisher", help="Fisher matrix from QMLE.", required=True)
parser.add_argument(
"--qmle-sparcity-cut", default=0.001, type=float,
help="Sparsity pattern to cut using QMLE Fisher matrix.")
parser.add_argument("--use-qmle-evecs", action="store_true")
parser.add_argument("--iterations", type=int, default=1,
help="Number of iterations")
parser.add_argument("--iterations", type=int, default=500,
help="Number of iterations")
parser.add_argument("--reg-in-cov", action="store_true")
parser.add_argument("--fbase", default="")
args = parser.parse_args()
Expand All @@ -67,9 +77,12 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True):
normalized_qmle_cov = normalize(qmle_covariance)
normalized_qmle_fisher = normalize(qmle_fisher)

matrix_to_use_for_sparsity = normalized_qmle_cov if args.reg_in_cov else normalized_qmle_fisher
matrix_to_use_for_sparsity = (
normalized_qmle_cov if args.reg_in_cov else normalized_qmle_fisher)

if args.qmle_sparcity_cut > 0:
qmle_sparcity = np.abs(matrix_to_use_for_sparsity) > args.qmle_sparcity_cut
qmle_sparcity = np.abs(
matrix_to_use_for_sparsity) > args.qmle_sparcity_cut
else:
qmle_sparcity = np.ones(qmle_fisher.shape, dtype=bool)

Expand All @@ -79,26 +92,20 @@ def mcdonald_eval_fix(boot_cov, qmle_cov, use_boot_base_evecs=True):
newcov = bootstrap_covariance * qmle_sparcity
else:
boostrap_fisher, _ = safe_inverse(bootstrap_covariance)
newcov, _ = safe_inverse(boostrap_fisher*qmle_sparcity)
newcov, _ = safe_inverse(boostrap_fisher * qmle_sparcity)

newcov = mcdonald_eval_fix(newcov, qmle_covariance, not args.use_qmle_evecs)
diff = np.abs(newcov-bootstrap_covariance)
bootstrap_covariance=newcov
newcov = mcdonald_eval_fix(
newcov, qmle_covariance, not args.use_qmle_evecs)
diff = np.abs(newcov - bootstrap_covariance)
bootstrap_covariance = newcov

if np.all(diff<1e-8):
if np.all(diff < 1e-8):
print("Converged.")
break

for idx in qmle_zero_idx:
bootstrap_covariance[idx, idx] = 0

basis_txt = "qmle" if args.use_qmle_evecs else "boot"
np.savetxt(f"{outdir}/{args.fbase}regularized-bootstrap-cov-{basis_txt}-evecs.txt", bootstrap_covariance)








finalfname = f"{args.fbase}regularized-bootstrap-cov-{basis_txt}-evecs.txt"
np.savetxt(f"{outdir}/{finalfname}", bootstrap_covariance)
Loading

0 comments on commit 96166a0

Please sign in to comment.