From 6ea5c6cc4494487a88f2e3a90f3a460a99cca24e Mon Sep 17 00:00:00 2001 From: puzhichen <147788878+puzhichen@users.noreply.github.com> Date: Mon, 20 Jan 2025 23:13:36 +0800 Subject: [PATCH] Davidson iterations for tddft on GPU (#305) * some mmodifications * test * finish writting, start debugging * Finish debugging and unit tests. * remove some comments and unused codes * after review the codes * change the threshold in precond * add the import _response_functions * change codes according to review comments --- gpu4pyscf/tdscf/_lr_eig.py | 209 +++++++++++++++++----------- gpu4pyscf/tdscf/rhf.py | 35 +++-- gpu4pyscf/tdscf/rks.py | 7 +- gpu4pyscf/tdscf/tests/test_tdrhf.py | 8 +- gpu4pyscf/tdscf/tests/test_tdrks.py | 6 +- gpu4pyscf/tdscf/tests/test_tduhf.py | 4 +- gpu4pyscf/tdscf/tests/test_tduks.py | 6 +- gpu4pyscf/tdscf/uhf.py | 22 +-- gpu4pyscf/tdscf/uks.py | 8 +- 9 files changed, 178 insertions(+), 127 deletions(-) diff --git a/gpu4pyscf/tdscf/_lr_eig.py b/gpu4pyscf/tdscf/_lr_eig.py index 880732e0..ae72899f 100644 --- a/gpu4pyscf/tdscf/_lr_eig.py +++ b/gpu4pyscf/tdscf/_lr_eig.py @@ -17,9 +17,11 @@ functions are provided in future PySCF release. ''' +import cupy as cp import sys import numpy as np import scipy.linalg +import cupyx.scipy.linalg from pyscf.lib.parameters import MAX_MEMORY from pyscf.lib import logger from pyscf.lib.linalg_helper import _sort_elast, _outprod_to_subspace @@ -85,7 +87,8 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, if isinstance(x0, np.ndarray) and x0.ndim == 1: x0 = x0[None,:] - x0 = np.asarray(x0) + + x0 = cp.asarray(x0) x0_size = x0.shape[1] if MAX_SPACE_INC is None: @@ -101,15 +104,15 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, max_space = min(max_space, x0_size) log.debug(f'Set max_space {max_space}, space_inc {space_inc}') - xs = np.zeros((0, x0_size)) - ax = np.zeros((0, x0_size)) + xs = cp.zeros((0, x0_size)) + ax = cp.zeros((0, x0_size)) e = w = v = None - conv_last = conv = np.zeros(nroots, dtype=bool) + conv_last = conv = cp.zeros(nroots, dtype=bool) xt = x0 if x0sym is not None: - xt_ir = np.asarray(x0sym) - xs_ir = np.array([], dtype=xt_ir.dtype) + xt_ir = cp.asarray(x0sym) + xs_ir = cp.array([], dtype=xt_ir.dtype) for icyc in range(max_cycle): xt, xt_idx = _qr(xt, lindep) @@ -119,39 +122,46 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, row0 = len(xs) axt = aop(xt) - xs = np.vstack([xs, xt]) - ax = np.vstack([ax, axt]) + xs = cp.vstack([xs, xt]) + ax = cp.vstack([ax, axt]) + axt = None if x0sym is not None: - xs_ir = np.hstack([xs_ir, xt_ir[xt_idx]]) + xs_ir = cp.hstack([xs_ir, xt_ir[xt_idx]]) # Compute heff = xs.conj().dot(ax.T) if w is None: heff = xs.conj().dot(ax.T) else: hsub = xt.conj().dot(ax.T) - heff = np.block([[np.diag(w), hsub[:,:row0].conj().T], - [hsub[:,:row0], hsub[:,row0:]]]) + heff = cp.empty((len(xs), len(xs)), dtype=hsub.dtype) + heff[:row0,:row0] = cp.diag(w) + heff[:row0,row0:] = hsub[:,:row0].conj().T + heff[row0:,:row0] = hsub[:,:row0] + heff[row0:,row0:] = hsub[:,row0:] + hsub = None + xt = None if x0sym is None: - w, v = scipy.linalg.eigh(heff) + w, v = cp.linalg.eigh(heff) else: # Diagonalize within eash symmetry sectors row1 = len(xs) - w = np.empty(row1) - v = np.zeros((row1, row1)) + w = cp.empty(row1) + v = cp.zeros((row1, row1)) v_ir = [] i1 = 0 for ir in set(xs_ir): - idx = np.where(xs_ir == ir)[0] + idx = cp.where(xs_ir == ir)[0] i0, i1 = i1, i1 + idx.size - w_sub, v_sub = scipy.linalg.eigh(heff[idx[:,None],idx]) + w_sub, v_sub = cp.linalg.eigh(heff[idx[:,None],idx]) w[i0:i1] = w_sub v[idx,i0:i1] = v_sub v_ir.append([ir] * idx.size) - w_idx = np.argsort(w) + w_idx = cp.argsort(w) w = w[w_idx] v = v[:,w_idx] - xs_ir = np.hstack(v_ir)[w_idx] + xs_ir = cp.hstack(v_ir)[w_idx] + heff = None w, v, idx = pick(w, v, nroots, locals()) if x0sym is not None: @@ -168,8 +178,8 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, de = e else: # mapping to previous eigenvectors - vlast = np.eye(nroots) - elast, conv_last = _sort_elast(elast, conv, vlast, + vlast = cp.eye(nroots) + elast, conv_last = _sort_elast_gpu(elast, conv, vlast, v[:nroots,:nroots], log) de = e - elast @@ -189,7 +199,7 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, if x0sym is not None: xt_ir = xs_ir[:t_size] - dx_norm = np.linalg.norm(xt, axis=1) + dx_norm = cp.linalg.norm(xt, axis=1) max_dx_norm = max(dx_norm[:nroots]) conv = dx_norm[:nroots] < tol_residual for k, ek in enumerate(e[:nroots]): @@ -199,18 +209,17 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, else: log.debug1('root %d |r|= %4.3g e= %s max|de|= %4.3g', k, dx_norm[k], ek, de[k]) - ide = np.argmax(abs(de)) + ide = cp.argmax(abs(de)) if all(conv): log.debug('converged %d %d |r|= %4.3g e= %s max|de|= %4.3g', icyc, len(xs), max_dx_norm, e, de[ide]) break # remove subspace linear dependency - for k, xk in enumerate(xt): - if dx_norm[k] > tol_residual: - xt[k] = precond(xk, e[0]) + r_index = dx_norm > tol_residual + xt[r_index] = precond(xt[r_index], w[:t_size][r_index]) xt -= xs.conj().dot(xt.T).T.dot(xs) - xt_norm = np.linalg.norm(xt, axis=1) + xt_norm = cp.linalg.norm(xt, axis=1) remaining = [] for k, xk in enumerate(xt): @@ -236,7 +245,7 @@ def eigh(aop, x0, precond, tol_residual=1e-5, lindep=1e-12, nroots=1, if len(x0) < min(x0_size, nroots): log.warn(f'Not enough eigenvectors (len(x0)={len(x0)}, nroots={nroots})') - return conv, e, x0 + return conv, e.get(), x0.get() def eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=None, max_cycle=50, max_memory=MAX_MEMORY, lindep=1e-12, verbose=logger.WARN): @@ -541,8 +550,8 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non log = verbose else: log = logger.Logger(sys.stdout, verbose) - assert x0.ndim == 2 + x0 = cp.asarray(x0) A_size = x0.shape[1] // 2 V = x0[:,:A_size] W = x0[:,A_size:] @@ -562,23 +571,23 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non log.debug(f'Set max_space {max_space}, space_inc {space_inc}') if x0sym is not None: - x0_ir = np.asarray(x0sym) + x0_ir = cp.asarray(x0sym) '''U1 = AV + BW U2 = AW + BV''' - V_holder = np.empty((A_size, max_space), order='F') - W_holder = np.empty_like(V_holder) - U1_holder = np.empty_like(V_holder) - U2_holder = np.empty_like(V_holder) - - a = np.empty((max_space*2,max_space*2)) - b = np.empty_like(a) - sigma = np.empty_like(a) - pi = np.empty_like(a) + V_holder = cp.empty((A_size, max_space), order='F') + W_holder = cp.empty_like(V_holder) + U1_holder = cp.empty_like(V_holder) + U2_holder = cp.empty_like(V_holder) + + a = cp.empty((max_space*2,max_space*2)) + b = cp.empty_like(a) + sigma = cp.empty_like(a) + pi = cp.empty_like(a) e = None v_sub = None vlast = None - conv_last = conv = np.zeros(nroots, dtype=bool) + conv_last = conv = cp.zeros(nroots, dtype=bool) fresh_start = True for icyc in range(max_cycle): @@ -588,7 +597,7 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non if x0sym is not None: xs_ir = xt_ir = x0_ir - axt = aop(np.hstack([V, W])) + axt = aop(cp.hstack([V, W])) U1 = axt[:,:A_size] U2 = -axt[:,A_size:] m0, m1 = m1, m1+len(U1) @@ -633,13 +642,13 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non a[:m1,:m1], b[:m1,:m1], sigma[:m1,:m1], pi[:m1,:m1], space_inc) else: # Diagonalize within eash symmetry sectors - omega = np.empty(m1) - x = np.zeros((m1, m1)) - y = np.zeros_like(x) + omega = cp.empty(m1) + x = cp.zeros((m1, m1)) + y = cp.zeros_like(x) v_ir = [] i1 = 0 for ir in set(xs_ir): - idx = np.nonzero(xs_ir[:m1] == ir)[0] + idx = cp.nonzero(xs_ir[:m1] == ir)[0] _w, _x, _y = TDDFT_subspace_eigen_solver( a[idx[:,None],idx], b[idx[:,None],idx], sigma[idx[:,None],idx], pi[idx[:,None],idx], idx.size) @@ -648,16 +657,16 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non x[idx,i0:i1] = _x y[idx,i0:i1] = _y v_ir.append([ir] * _w.size) - idx = np.argsort(omega) + idx = cp.argsort(omega) omega = omega[idx] - v_ir = np.hstack(v_ir)[idx] + v_ir = cp.hstack(v_ir)[idx] x = x[:,idx] y = y[:,idx] w, e, elast = omega[:space_inc], omega[:nroots], e v_sub = x[:,:space_inc] if not fresh_start: - elast, conv_last = _sort_elast(elast, conv, vlast, v_sub[:,:nroots], log) + elast, conv_last = _sort_elast_gpu(elast, conv, vlast, v_sub[:,:nroots], log) vlast = v_sub[:,:nroots] if elast is None: @@ -684,8 +693,8 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non R_y += U1_holder[:,:m1].dot(y) R_y += Y_full * w - r_norms = np.linalg.norm(R_x, axis=0) ** 2 - r_norms += np.linalg.norm(R_y, axis=0) ** 2 + r_norms = cp.linalg.norm(R_x, axis=0) ** 2 + r_norms += cp.linalg.norm(R_y, axis=0) ** 2 r_norms = r_norms ** .5 if x0sym is not None: @@ -701,14 +710,14 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non else: log.debug1('root %d |r|= %4.3g e= %s max|de|= %4.3g', k, r_norms[k], ek, de[k]) - ide = np.argmax(abs(de)) + ide = cp.argmax(abs(de)) if all(conv): log.debug('converged %d %d |r|= %4.3g e= %s max|de|= %4.3g', icyc, len(conv), max_r_norm, e, de[ide]) break r_index = r_norms > tol_residual - XY_new = precond(np.vstack([R_x[:,r_index], R_y[:,r_index]]).T, w[r_index]) + XY_new = precond(cp.vstack([R_x[:,r_index], R_y[:,r_index]]).T, w[r_index]) X_new = XY_new[:,:A_size].T Y_new = XY_new[:,A_size:].T if x0sym is None: @@ -720,17 +729,17 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non V = [] W = [] for ir in set(xt_ir): - idx = np.nonzero(xt_ir == ir)[0] + idx = cp.nonzero(xt_ir == ir)[0] _V, _W = VW_Gram_Schmidt_fill_holder( V_holder[:,:m1], W_holder[:,:m1], X_new[:,idx], Y_new[:,idx]) V.append(_V) W.append(_W) xt_orth_ir.append([ir] * len(_V)) if len(V) > 0: - V = np.vstack(V) - W = np.vstack(W) - xt_ir = np.hstack(xt_orth_ir) - xs_ir = np.hstack([xs_ir, xt_ir]) + V = cp.vstack(V) + W = cp.vstack(W) + xt_ir = cp.hstack(xt_orth_ir) + xs_ir = cp.hstack([xs_ir, xt_ir]) if len(V) == 0: log.debug(f'Linear dependency in trial subspace. |r| for each state {r_norms}') @@ -740,8 +749,8 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non len(V), r_norms.size - len(V)) X_new = Y_new = R_x = R_y = None - xy_norms = np.linalg.norm(V, axis=0) ** 2 - xy_norms += np.linalg.norm(W, axis=0) ** 2 + xy_norms = cp.linalg.norm(V, axis=0) ** 2 + xy_norms += cp.linalg.norm(W, axis=0) ** 2 norm_min = (xy_norms ** .5).min() log.debug('real_lr_eig %d %d |r|= %4.3g e= %s max|de|= %4.3g lindep= %4.3g', icyc, m1, max_r_norm, e, de[ide], norm_min) @@ -752,7 +761,7 @@ def real_eig(aop, x0, precond, tol_residual=1e-5, nroots=1, x0sym=None, pick=Non if len(x0[0]) < min(A_size, nroots): log.warn(f'Not enough eigenvectors (len(x0)={len(x0[0])}, nroots={nroots})') - return conv[:nroots], e[:nroots], np.hstack(x0) + return conv[:nroots], e[:nroots].get(), cp.hstack(x0).get() def _gen_x0(v, xs): out = _outprod_to_subspace(v[::2], xs) @@ -788,13 +797,14 @@ def _qr(xs, lindep=1e-14): for j in range(nv): prod = xs[j].conj().dot(xi) xi -= xs[j] * prod - norm = np.linalg.norm(xi) + norm = cp.linalg.norm(xi) if norm**2 > lindep: xs[nv] = xi/norm nv += 1 idx.append(i) return xs[:nv], idx + def _symmetric_orth(xt, lindep=1e-6): xt = np.asarray(xt) if xt.dtype == np.float64: @@ -906,31 +916,31 @@ def TDDFT_subspace_eigen_solver(a, b, sigma, pi, nroots): ''' [ a b ] x - [ σ π] x Ω = 0 ''' ''' [ b a ] y [-π -σ] y = 0 ''' - d = abs(np.diag(sigma)) + d = abs(cp.diag(sigma)) d_mh = d**(-0.5) - s_m_p = np.einsum('i,ij,j->ij', d_mh, sigma - pi, d_mh) + s_m_p = cp.einsum('i,ij,j->ij', d_mh, sigma - pi, d_mh) '''LU = d^−1/2 (σ − π) d^−1/2''' ''' A = LU ''' - L, U = scipy.linalg.lu(s_m_p, permute_l=True) - L_inv = np.linalg.inv(L) - U_inv = np.linalg.inv(U) + L, U = cupyx.scipy.linalg.lu(s_m_p, permute_l=True) + L_inv = cp.linalg.inv(L) + U_inv = cp.linalg.inv(U) '''U^-T d^−1/2 (a−b) d^-1/2 U^-1 = GG^T ''' - d_amb_d = np.einsum('i,ij,j->ij', d_mh, a-b, d_mh) - GGT = np.linalg.multi_dot([U_inv.T, d_amb_d, U_inv]) + d_amb_d = cp.einsum('i,ij,j->ij', d_mh, a-b, d_mh) + GGT = cp.dot(U_inv.T, cp.dot(d_amb_d, U_inv)) - G = scipy.linalg.cholesky(GGT, lower=True) - G_inv = np.linalg.inv(G) + G = cp.linalg.cholesky(GGT) + G_inv = cp.linalg.inv(G) ''' M = G^T L^−1 d^−1/2 (a+b) d^−1/2 L^−T G ''' - d_apb_d = np.einsum('i,ij,j->ij', d_mh, a+b, d_mh) - M = np.linalg.multi_dot([G.T, L_inv, d_apb_d, L_inv.T, G]) + d_apb_d = cp.einsum('i,ij,j->ij', d_mh, a+b, d_mh) + M = cp.dot(G.T, cp.dot(L_inv, cp.dot(d_apb_d, cp.dot(L_inv.T, G)))) - omega2, Z = np.linalg.eigh(M) - if np.any(omega2 <= 0): - idx = np.nonzero(omega2 > 0)[0] + omega2, Z = cp.linalg.eigh(M) + if cp.any(omega2 <= 0): + idx = cp.nonzero(omega2 > 0)[0] omega2 = omega2[idx[:nroots]] Z = Z[:,idx[:nroots]] else: @@ -941,13 +951,14 @@ def TDDFT_subspace_eigen_solver(a, b, sigma, pi, nroots): ''' It requires Z^T Z = 1/Ω ''' ''' x+y = d^−1/2 L^−T GZ Ω^-0.5 ''' ''' x−y = d^−1/2 U^−1 G^−T Z Ω^0.5 ''' - x_p_y = np.einsum('i,ik,k->ik', d_mh, L_inv.T.dot(G.dot(Z)), omega**-0.5) - x_m_y = np.einsum('i,ik,k->ik', d_mh, U_inv.dot(G_inv.T.dot(Z)), omega**0.5) + x_p_y = cp.einsum('i,ik,k->ik', d_mh, L_inv.T.dot(G.dot(Z)), omega**-0.5) + x_m_y = cp.einsum('i,ik,k->ik', d_mh, U_inv.dot(G_inv.T.dot(Z)), omega**0.5) x = (x_p_y + x_m_y)/2 y = x_p_y - x return omega, x, y + def VW_Gram_Schmidt_fill_holder(V_holder, W_holder, X_new, Y_new, lindep=1e-6): ''' QR orthogonalization for (X_new, Y_new) basis vectors, then apply symmetric @@ -968,37 +979,40 @@ def VW_Gram_Schmidt_fill_holder(V_holder, W_holder, X_new, Y_new, lindep=1e-6): # s21 is symmetric s21 = X_new.T.dot(Y_new) s21 += Y_new.T.dot(X_new) - e, c = np.linalg.eigh(s11) + e, c = cp.linalg.eigh(s11) mask = e > lindep**2 e = e[mask] if e.size == 0: - return (np.zeros([0, x0_size], dtype=X_new.dtype), - np.zeros([0, x0_size], dtype=Y_new.dtype)) + return (cp.zeros([0, x0_size], dtype=X_new.dtype), + cp.zeros([0, x0_size], dtype=Y_new.dtype)) c = c[:,mask] * e**-.5 csc = c.T.dot(s21).dot(c) n = csc.shape[0] for i in range(n): - w, u = np.linalg.eigh(csc[i:,i:]) + w, u = cp.linalg.eigh(csc[i:,i:]) mask = 1 - abs(w) > lindep - if np.any(mask): + if cp.any(mask): c = c[:,i:] break else: - return (np.zeros([0, x0_size], dtype=X_new.dtype), - np.zeros([0, x0_size], dtype=Y_new.dtype)) + return (cp.zeros([0, x0_size], dtype=X_new.dtype), + cp.zeros([0, x0_size], dtype=Y_new.dtype)) w = w[mask] u = u[:,mask] c_orth = c.dot(u) if e[0] < 1e-6 or 1-abs(w[0]) < 1e-3: # Rerun the orthogonalization to reduce numerical errors - e, c = np.linalg.eigh(c_orth.T.dot(s11).dot(c_orth)) + e, c = cp.linalg.eigh(c_orth.T.dot(s11).dot(c_orth)) c *= e**-.5 c_orth = c_orth.dot(c) csc = c_orth.T.dot(s21).dot(c_orth) - w, u = np.linalg.eigh(csc) + w, u = cp.linalg.eigh(csc) c_orth = c_orth.dot(u) + mask = 1 - abs(w) > lindep + w = w[mask] + c_orth = c_orth[:,mask] # Symmetric diagonalize # [1 w] => c = [a b] @@ -1016,3 +1030,28 @@ def VW_Gram_Schmidt_fill_holder(V_holder, W_holder, X_new, Y_new, lindep=1e-6): y_orth = Y_new.dot(c_orth * a) y_orth += X_new.dot(c_orth * b) return x_orth.T, y_orth.T + + +def _sort_elast_gpu(elast, conv_last, vlast, v, log): + ''' + Eigenstates may be flipped during the Davidson iterations. Reorder the + eigenvalues of last iteration to make them comparable to the eigenvalues + of the current iterations. + ''' + head, nroots = vlast.shape + ovlp = abs(cp.dot(v[:head].conj().T, vlast)) + mapping = cp.argmax(ovlp, axis=1) + found = cp.any(ovlp > .5, axis=1) + + if log.verbose >= logger.DEBUG: + ordering_diff = (mapping != cp.arange(len(mapping))) + if any(ordering_diff & found): + log.debug('Old state -> New state') + for i in cp.where(ordering_diff)[0]: + log.debug(' %3d -> %3d ', mapping[i], i) + + conv = conv_last[mapping] + e = elast[mapping] + conv[~found] = False + e[~found] = 0. + return e, conv \ No newline at end of file diff --git a/gpu4pyscf/tdscf/rhf.py b/gpu4pyscf/tdscf/rhf.py index 55b7618c..a7b5c4f8 100644 --- a/gpu4pyscf/tdscf/rhf.py +++ b/gpu4pyscf/tdscf/rhf.py @@ -15,11 +15,9 @@ import numpy as np import cupy as cp -import scipy.linalg -from pyscf import gto from pyscf import lib from pyscf.tdscf import rhf as tdhf_cpu -from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh, eig as lr_eig, real_eig +from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh, real_eig from gpu4pyscf import scf from gpu4pyscf.lib.cupy_helper import contract, tag_array from gpu4pyscf.lib import utils @@ -53,7 +51,7 @@ def gen_tda_operation(mf, fock_ao=None, singlet=True, wfnsym=None): orbo2 = orbo * 2. # *2 for double occupancy e_ia = hdiag = mo_energy[viridx] - mo_energy[occidx,None] - hdiag = hdiag.ravel().get() + hdiag = hdiag.ravel() vresp = mf.gen_response(singlet=singlet, hermi=0) nocc, nvir = e_ia.shape @@ -66,7 +64,7 @@ def vind(zs): v1mo = contract('xpq,qo->xpo', v1ao, orbo) v1mo = contract('xpo,pv->xov', v1mo, orbv.conj()) v1mo += zs * e_ia - return v1mo.reshape(v1mo.shape[0],-1).get() + return v1mo.reshape(v1mo.shape[0],-1) return vind, hdiag @@ -100,11 +98,15 @@ class TDBase(lib.StreamObject): get_ab = NotImplemented def get_precond(self, hdiag): + threshold_t=1.0e-4 def precond(x, e, *args): - if isinstance(e, np.ndarray): - e = e[0] + n_states = x.shape[0] + diagd = cp.repeat(hdiag.reshape(1,-1), n_states, axis=0) + e = e.reshape(-1,1) diagd = hdiag - (e-self.level_shift) - diagd[abs(diagd)<1e-8] = 1e-8 + diagd = cp.where(abs(diagd) < threshold_t, cp.sign(diagd)*threshold_t, diagd) + a_size = x.shape[1]//2 + diagd[:,a_size:] = diagd[:,a_size:]*(-1) return x/diagd return precond @@ -170,6 +172,17 @@ def _contract_multipole(tdobj, ints, hermi=True, xy=None): class TDA(TDBase): __doc__ = tdhf_cpu.TDA.__doc__ + def get_precond(self, hdiag): + threshold_t=1.0e-4 + def precond(x, e, *args): + n_states = x.shape[0] + diagd = cp.repeat(hdiag.reshape(1,-1), n_states, axis=0) + e = e.reshape(-1,1) + diagd = hdiag - (e-self.level_shift) + diagd = cp.where(abs(diagd) < threshold_t, cp.sign(diagd)*threshold_t, diagd) + return x/diagd + return precond + def gen_vind(self, mf=None): '''Generate function to compute Ax''' if mf is None: @@ -228,7 +241,7 @@ def kernel(self, x0=None, nstates=None): precond = self.get_precond(hdiag) def pickeig(w, v, nroots, envs): - idx = np.where(w > self.positive_eig_threshold)[0] + idx = cp.where(w > self.positive_eig_threshold)[0] return w[idx], v[:,idx], idx x0sym = None @@ -291,10 +304,10 @@ def vind(zs): v1_top += xs * e_ia # AX v1_bot += ys * e_ia # (A*)Y return cp.hstack((v1_top.reshape(nz,nocc*nvir), - -v1_bot.reshape(nz,nocc*nvir))).get() + -v1_bot.reshape(nz,nocc*nvir))) hdiag = cp.hstack([hdiag.ravel(), -hdiag.ravel()]) - return vind, hdiag.get() + return vind, hdiag class TDHF(TDBase): diff --git a/gpu4pyscf/tdscf/rks.py b/gpu4pyscf/tdscf/rks.py index 9961a21c..948c7d50 100644 --- a/gpu4pyscf/tdscf/rks.py +++ b/gpu4pyscf/tdscf/rks.py @@ -15,7 +15,7 @@ import numpy as np import cupy as cp from pyscf import lib -from pyscf.tdscf._lr_eig import eigh as lr_eigh +from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh from gpu4pyscf.dft.rks import KohnShamDFT from gpu4pyscf.lib.cupy_helper import contract, tag_array, transpose_sum from gpu4pyscf.lib import logger @@ -54,7 +54,6 @@ def gen_vind(self, mf=None): d_ia = e_ia ** .5 ed_ia = e_ia * d_ia hdiag = e_ia.ravel() ** 2 - hdiag = hdiag.get() vresp = mf.gen_response(singlet=singlet, hermi=1) nocc, nvir = e_ia.shape @@ -71,7 +70,7 @@ def vind(zs): v1mo = contract('xpo,pv->xov', v1mo, orbv) v1mo += zs * ed_ia v1mo *= d_ia - return v1mo.reshape(v1mo.shape[0],-1).get() + return v1mo.reshape(v1mo.shape[0],-1) return vind, hdiag @@ -95,7 +94,7 @@ def kernel(self, x0=None, nstates=None): precond = self.get_precond(hdiag) def pickeig(w, v, nroots, envs): - idx = np.where(w > self.positive_eig_threshold)[0] + idx = cp.where(w > self.positive_eig_threshold)[0] return w[idx], v[:,idx], idx x0sym = None diff --git a/gpu4pyscf/tdscf/tests/test_tdrhf.py b/gpu4pyscf/tdscf/tests/test_tdrhf.py index 4e6761ca..f1a0774a 100644 --- a/gpu4pyscf/tdscf/tests/test_tdrhf.py +++ b/gpu4pyscf/tdscf/tests/test_tdrhf.py @@ -125,12 +125,12 @@ def test_tda_vind(self): nvir = nmo - nocc zs = np.random.rand(3,nocc,nvir) ref = mf.to_cpu().TDA().set(singlet=False).gen_vind()[0](zs) - dat = mf.TDA().set(singlet=False).gen_vind()[0](cp.asarray(zs)) + dat = mf.TDA().set(singlet=False).gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) df_mf = self.df_mf ref = df_mf.to_cpu().TDA().set(singlet=True).gen_vind()[0](zs) - dat = df_mf.TDA().set(singlet=True).gen_vind()[0](cp.asarray(zs)) + dat = df_mf.TDA().set(singlet=True).gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_tdhf_vind(self): @@ -140,12 +140,12 @@ def test_tdhf_vind(self): nvir = nmo - nocc zs = np.random.rand(3,2,nocc,nvir) ref = mf.to_cpu().TDHF().set(singlet=True).gen_vind()[0](zs) - dat = mf.TDHF().set(singlet=True).gen_vind()[0](zs) + dat = mf.TDHF().set(singlet=True).gen_vind()[0](zs).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) df_mf = self.df_mf ref = df_mf.to_cpu().TDHF().set(singlet=False).gen_vind()[0](zs) - dat = df_mf.TDHF().set(singlet=False).gen_vind()[0](zs) + dat = df_mf.TDHF().set(singlet=False).gen_vind()[0](zs).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) if __name__ == "__main__": diff --git a/gpu4pyscf/tdscf/tests/test_tdrks.py b/gpu4pyscf/tdscf/tests/test_tdrks.py index cebae87c..233b6463 100644 --- a/gpu4pyscf/tdscf/tests/test_tdrks.py +++ b/gpu4pyscf/tdscf/tests/test_tdrks.py @@ -251,7 +251,7 @@ def test_tda_vind(self): nvir = nmo - nocc zs = np.random.rand(3,nocc,nvir) ref = mf.to_cpu().TDA().set(singlet=False).gen_vind()[0](zs) - dat = mf.TDA().set(singlet=False).gen_vind()[0](cp.asarray(zs)) + dat = mf.TDA().set(singlet=False).gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_tddft_vind(self): @@ -261,7 +261,7 @@ def test_tddft_vind(self): nvir = nmo - nocc zs = np.random.rand(3,2,nocc,nvir) ref = mf.to_cpu().TDDFT().set(singlet=True).gen_vind()[0](zs) - dat = mf.TDDFT().set(singlet=True).gen_vind()[0](zs) + dat = mf.TDDFT().set(singlet=True).gen_vind()[0](zs).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_casida_tddft_vind(self): @@ -271,7 +271,7 @@ def test_casida_tddft_vind(self): nvir = nmo - nocc zs = np.random.rand(3,nocc,nvir) ref = mf.to_cpu().CasidaTDDFT().gen_vind()[0](zs) - dat = mf.CasidaTDDFT().gen_vind()[0](cp.asarray(zs)) + dat = mf.CasidaTDDFT().gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) if __name__ == "__main__": diff --git a/gpu4pyscf/tdscf/tests/test_tduhf.py b/gpu4pyscf/tdscf/tests/test_tduhf.py index 884249e4..0acd8578 100644 --- a/gpu4pyscf/tdscf/tests/test_tduhf.py +++ b/gpu4pyscf/tdscf/tests/test_tduhf.py @@ -100,7 +100,7 @@ def test_tda_vind(self): nvirb = nmo - noccb zs = np.random.rand(3,nocca*nvira+noccb*nvirb) ref = mf.to_cpu().TDA().set().gen_vind()[0](zs) - dat = mf.TDA().set().gen_vind()[0](cp.asarray(zs)) + dat = mf.TDA().set().gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_tdhf_vind(self): @@ -111,7 +111,7 @@ def test_tdhf_vind(self): nvirb = nmo - noccb zs = np.random.rand(3,2,nocca*nvira+noccb*nvirb) ref = mf.to_cpu().TDHF().set().gen_vind()[0](zs) - dat = mf.TDHF().set().gen_vind()[0](zs) + dat = mf.TDHF().set().gen_vind()[0](zs).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) if __name__ == "__main__": diff --git a/gpu4pyscf/tdscf/tests/test_tduks.py b/gpu4pyscf/tdscf/tests/test_tduks.py index ad1e3f45..34c214b8 100644 --- a/gpu4pyscf/tdscf/tests/test_tduks.py +++ b/gpu4pyscf/tdscf/tests/test_tduks.py @@ -187,7 +187,7 @@ def test_tda_vind(self): nvirb = nmo - noccb zs = np.random.rand(3,nocca*nvira+noccb*nvirb) ref = mf.to_cpu().TDA().gen_vind()[0](zs) - dat = mf.TDA().gen_vind()[0](cp.asarray(zs)) + dat = mf.TDA().gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_tddft_vind(self): @@ -198,7 +198,7 @@ def test_tddft_vind(self): nvirb = nmo - noccb zs = np.random.rand(3,2,nocca*nvira+noccb*nvirb) ref = mf.to_cpu().TDDFT().gen_vind()[0](zs) - dat = mf.TDDFT().gen_vind()[0](cp.asarray(zs)) + dat = mf.TDDFT().gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) def test_casida_tddft_vind(self): @@ -209,7 +209,7 @@ def test_casida_tddft_vind(self): nvirb = nmo - noccb zs = np.random.rand(3,nocca*nvira+noccb*nvirb) ref = mf.to_cpu().CasidaTDDFT().gen_vind()[0](zs) - dat = mf.CasidaTDDFT().gen_vind()[0](cp.asarray(zs)) + dat = mf.CasidaTDDFT().gen_vind()[0](cp.asarray(zs)).get() self.assertAlmostEqual(abs(ref - dat).max(), 0, 9) if __name__ == "__main__": diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index d2b4224d..a0182390 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -63,7 +63,7 @@ def gen_tda_operation(mf, fock_ao=None, wfnsym=None): e_ia_a = mo_energy[0][viridxa] - mo_energy[0][occidxa,None] e_ia_b = mo_energy[1][viridxb] - mo_energy[1][occidxb,None] e_ia = cp.hstack((e_ia_a.reshape(-1), e_ia_b.reshape(-1))) - hdiag = e_ia.get() + hdiag = e_ia nocca, nvira = e_ia_a.shape noccb, nvirb = e_ia_b.shape @@ -88,7 +88,7 @@ def vind(zs): v1a += za * e_ia_a v1b += zb * e_ia_b hx = cp.hstack((v1a.reshape(nz,-1), v1b.reshape(nz,-1))) - return hx.get() + return hx return vind, hdiag @@ -185,7 +185,7 @@ def kernel(self, x0=None, nstates=None): precond = self.get_precond(hdiag) def pickeig(w, v, nroots, envs): - idx = np.where(w > self.positive_eig_threshold)[0] + idx = cp.where(w > self.positive_eig_threshold)[0] return w[idx], v[:,idx], idx x0sym = None @@ -258,7 +258,7 @@ def gen_vind(self): orbva = mo_coeff[0][:,viridxa] orbov = (orbob, orbva) e_ia = mo_energy[0][viridxa] - mo_energy[1][occidxb,None] - hdiag = e_ia.ravel().get() + hdiag = e_ia.ravel() elif extype == 1: occidxa = mo_occ[0] > 0 @@ -267,7 +267,7 @@ def gen_vind(self): orbvb = mo_coeff[1][:,viridxb] orbov = (orboa, orbvb) e_ia = mo_energy[1][viridxb] - mo_energy[0][occidxa,None] - hdiag = e_ia.ravel().get() + hdiag = e_ia.ravel() vresp = gen_uhf_response_sf( mf, hermi=0, collinear=self.collinear, @@ -283,7 +283,7 @@ def vind(zs): v1mo = contract('xpq,qo->xpo', v1ao, orbo) v1mo = contract('xpo,pv->xov', v1mo, orbv.conj()) v1mo += zs * e_ia - return v1mo.reshape(len(v1mo), -1).get() + return v1mo.reshape(len(v1mo), -1) return vind, hdiag @@ -461,10 +461,10 @@ def vind(zs): v1_bot[:,:nocca*nvira] += v1a_bot.reshape(nz,-1) v1_top[:,nocca*nvira:] += v1b_top.reshape(nz,-1) v1_bot[:,nocca*nvira:] += v1b_bot.reshape(nz,-1) - return cp.hstack([v1_top, -v1_bot]).get() + return cp.hstack([v1_top, -v1_bot]) hdiag = cp.hstack([hdiag.ravel(), -hdiag.ravel()]) - return vind, hdiag.get() + return vind, hdiag class TDHF(TDBase): @@ -578,9 +578,9 @@ def gen_vind(self): extype = self.extype if extype == 0: - hdiag = cp.hstack([e_ia_b2a.ravel(), -e_ia_a2b.ravel()]).get() + hdiag = cp.hstack([e_ia_b2a.ravel(), -e_ia_a2b.ravel()]) else: - hdiag = cp.hstack([e_ia_a2b.ravel(), -e_ia_b2a.ravel()]).get() + hdiag = cp.hstack([e_ia_a2b.ravel(), -e_ia_b2a.ravel()]) vresp = gen_uhf_response_sf( mf, hermi=0, collinear=self.collinear, @@ -681,7 +681,7 @@ def vind(zs): v1_top += zs_a2b * e_ia_a2b v1_bot += zs_b2a * e_ia_b2a hx = cp.hstack([v1_top.reshape(nz,-1), -v1_bot.reshape(nz,-1)]) - return hx.get() + return hx return vind, hdiag diff --git a/gpu4pyscf/tdscf/uks.py b/gpu4pyscf/tdscf/uks.py index 44672929..c2c48d22 100644 --- a/gpu4pyscf/tdscf/uks.py +++ b/gpu4pyscf/tdscf/uks.py @@ -16,7 +16,7 @@ import cupy as cp from pyscf import symm from pyscf import lib -from pyscf.tdscf._lr_eig import eigh as lr_eigh +from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh from gpu4pyscf.dft.rks import KohnShamDFT from gpu4pyscf.lib.cupy_helper import contract, tag_array, transpose_sum from gpu4pyscf.lib import logger @@ -69,7 +69,7 @@ def gen_vind(self, mf=None): d_ia = e_ia**.5 ed_ia = e_ia * d_ia hdiag = e_ia ** 2 - hdiag = hdiag.get() + hdiag = hdiag vresp = mf.gen_response(mo_coeff, mo_occ, hermi=1) nocca, nvira = e_ia_a.shape noccb, nvirb = e_ia_b.shape @@ -96,7 +96,7 @@ def vind(zs): hx = cp.hstack((v1a.reshape(nz,-1), v1b.reshape(nz,-1))) hx += ed_ia * zs hx *= d_ia - return hx.get() + return hx return vind, hdiag @@ -120,7 +120,7 @@ def kernel(self, x0=None, nstates=None): precond = self.get_precond(hdiag) def pickeig(w, v, nroots, envs): - idx = np.where(w > self.positive_eig_threshold)[0] + idx = cp.where(w > self.positive_eig_threshold)[0] return w[idx], v[:,idx], idx x0sym = None