Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DFT with small batch grids #193

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions gpu4pyscf/__config__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,31 @@
# such as A100-80G
if props['totalGlobalMem'] >= 64 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
min_grid_blksize = 256 #128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 2048 * 108
# such as V100-32G
elif props['totalGlobalMem'] >= 32 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
min_grid_blksize = 256 #128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 1024 * 80
# such as A30-24GB
elif props['totalGlobalMem'] >= 16 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
min_grid_blksize = 256 #128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 1024 * 80
# other gaming cards
else:
min_ao_blksize = 64
min_grid_blksize = 64*64
min_grid_blksize = 256 #64*64
ao_aligned = 32
grid_aligned = 128
mem_fraction = 0.9
Expand Down
80 changes: 75 additions & 5 deletions gpu4pyscf/dft/gen_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@
import cupy
from pyscf import lib
from pyscf import gto
from pyscf.gto.eval_gto import BLKSIZE, NBINS, CUTOFF, make_screen_index
from pyscf.gto.eval_gto import BLKSIZE, NBINS, CUTOFF
from pyscf import __config__
from cupyx.scipy.spatial.distance import cdist
from gpu4pyscf.lib import logger
from gpu4pyscf.gto.eval_gto import make_screen_index
from gpu4pyscf.dft import radi
from gpu4pyscf.lib.cupy_helper import load_library
from gpu4pyscf import __config__ as __gpu4pyscf_config__
Expand All @@ -44,8 +45,12 @@

from pyscf.dft.gen_grid import GROUP_BOUNDARY_PENALTY, NELEC_ERROR_TOL, LEBEDEV_ORDER, LEBEDEV_NGRID

GROUP_BOX_SIZE = 3.0
AO_ALIGNMENT = getattr(__config__, 'ao_aligned', 16)
GROUP_BOX_SIZE = 1.2
ALIGNMENT_UNIT = getattr(__gpu4pyscf_config__, 'grid_aligned', 128)
MIN_BLK_SIZE = getattr(__gpu4pyscf_config__, 'min_grid_blksize', 64*64)
GRID_BLKSIZE = MIN_BLK_SIZE

# SG0
# S. Chien and P. Gill, J. Comput. Chem. 27 (2006) 730-739.

Expand Down Expand Up @@ -419,7 +424,8 @@ def atomic_group_grids(mol, coords):
ctypes.c_int(ngrids)
)
if err != 0:
raise RuntimeError('CUDA Error')
raise RuntimeError('CUDA Error in GDFTgroup_grids kernel')

idx = group_ids.argsort()
return idx

Expand Down Expand Up @@ -464,6 +470,35 @@ def _load_conf(mod, name, default):
else:
return var

def gen_sparse_cache(mol, coords, blksize):
'''
determine sparse AO indices
'''
log = logger.new_logger(mol, 6)
ao_loc = mol.ao_loc_nr()
ngrids = coords.shape[0]
t0 = log.init_timer()
s_index = make_screen_index(mol, coords, blksize=blksize)
t0 = log.timer_debug1('s_index', *t0)
s_index_cpu = s_index.get()
nblocks = (ngrids + blksize - 1)//blksize
nbas = mol.nbas
nao = mol.nao
ao_indices = numpy.zeros([nblocks, nao], dtype=numpy.int32)
ao_loc_non0 = numpy.zeros([nblocks, nbas+1], dtype=numpy.int32)

# Running on CPU
libgdft.GDFTmake_sparse_cache(
s_index_cpu.ctypes.data_as(ctypes.c_void_p),
ao_loc.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nblocks),
ao_loc_non0.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nbas),
ao_indices.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nao))
t0 = log.timer_debug1('sparse ao kernel', *t0)
return ao_indices, s_index, ao_loc_non0

from pyscf.dft import gen_grid
from gpu4pyscf.lib import utils
class Grids(lib.StreamObject):
Expand All @@ -483,8 +518,9 @@ class Grids(lib.StreamObject):
alignment = ALIGNMENT_UNIT
cutoff = CUTOFF
_keys = gen_grid.Grids._keys

__init__ = gen_grid.Grids.__init__
__init__ = gen_grid.Grids.__init__

_keys.update({'sparse_cache'})

def __setattr__(self, key, val):
if key in ('atom_grid', 'atomic_radii', 'radii_adjust', 'radi_method',
Expand Down Expand Up @@ -523,6 +559,7 @@ def build(self, mol=None, with_non0tab=False, sort_grids=True, **kwargs):
self.screen_index = self.non0tab
else:
self.screen_index = self.non0tab = None

logger.info(self, 'tot grids = %d', len(self.weights))
return self

Expand All @@ -538,6 +575,7 @@ def reset(self, mol=None):
self.weights = None
self.non0tab = None
self.screen_index = None
self.sparse_cache = {}
return self

gen_atomic_grids = lib.module_method(
Expand All @@ -556,6 +594,38 @@ def get_partition(self, mol, atom_grids_tab=None,

make_mask = lib.module_method(make_mask, absences=['cutoff'])

def build_sparsity(self, sorted_mol):
''' Build sparsity data for sparse AO evaluation
Sort grids for batching grids
'''
#self.sparse_cache = gen_sparse_cache(sorted_mol, self.coords, GRID_BLKSIZE)
blksize = GRID_BLKSIZE
ngrids = self.coords.shape[0]
ao_indices, s_index, ao_loc_non0 = gen_sparse_cache(sorted_mol, self.coords, blksize)

nao_non0 = ao_loc_non0[:,-1]
ao_loc_non0 = cupy.asarray(ao_loc_non0)
ao_indices = cupy.asarray(ao_indices)

# Sort grids based on the number of nonzero AOs
idx = numpy.argsort(nao_non0)
nao_non0 = nao_non0[idx]
ao_indices = ao_indices[idx]
s_index = s_index[idx]
ao_loc_non0 = cupy.asarray(ao_loc_non0[idx])
idx = cupy.asarray(idx)
idx_grids = cupy.tile(idx*blksize, (blksize,1)).T
idx_grids += cupy.arange(blksize)
idx_grids = idx_grids.ravel(order='C')

self.coords = self.coords[idx_grids]
self.weights = self.weights[idx_grids]

sparse_data = (ao_indices, s_index, ao_loc_non0, nao_non0)
self.sparse_cache[blksize, ngrids] = sparse_data

return

def prune_by_density_(self, rho, threshold=0):
'''Prune grids if the electron density on the grid is small'''
if threshold == 0:
Expand Down
Loading
Loading