Skip to content

Commit

Permalink
Fix bugs when init_frz_model using tebd. (deepmodeling#1891)
Browse files Browse the repository at this point in the history
Fix bugs when init_frz_model using tebd.
Add init_variables for attention variables.
Fix precision of self.t_bias_atom_e in se_atten.py.
Change the default values in argcheck for se_atten (typos).
Improve the name scope in se_atten.py.
Delete some redundant variables in se_atten.py.
Move tf.constant from se_a.__init__() into build.
Add UTs for init_frz_model.

Co-authored-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
2 people authored and mingzhong15 committed Jan 15, 2023
1 parent d587246 commit 6505ea3
Show file tree
Hide file tree
Showing 19 changed files with 908 additions and 50 deletions.
10 changes: 4 additions & 6 deletions deepmd/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,7 @@ def __init__ (self,
self.embedding_net_variables = None
self.mixed_prec = None
self.place_holders = {}
nei_type = np.array([])
for ii in range(self.ntypes):
nei_type = np.append(nei_type, ii * np.ones(self.sel_a[ii])) # like a mask
self.nei_type = tf.constant(nei_type, dtype = tf.int32)
self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) # like a mask

avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
Expand Down Expand Up @@ -673,8 +670,9 @@ def _concat_type_embedding(
embedding:
environment of each atom represented by embedding.
'''
te_out_dim = type_embedding.get_shape().as_list()[-1]
nei_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(self.nei_type,dtype=tf.int32)) # shape is [self.nnei, 1+te_out_dim]
te_out_dim = type_embedding.get_shape().as_list()[-1]
self.t_nei_type = tf.constant(self.nei_type, dtype=tf.int32)
nei_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(self.t_nei_type,dtype=tf.int32)) # shape is [self.nnei, 1+te_out_dim]
nei_embed = tf.tile(nei_embed,(nframes*natoms[0],1)) # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
nei_embed = tf.reshape(nei_embed,[-1,te_out_dim])
embedding_input = tf.concat([xyz_scatter,nei_embed],1) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
Expand Down
86 changes: 59 additions & 27 deletions deepmd/descriptor/se_atten.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from deepmd.utils.type_embed import embed_atom_type
from deepmd.utils.sess import run_sess
from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph, get_tensor_by_name
from deepmd.utils.graph import get_attention_layer_variables_from_graph_def
from deepmd.utils.errors import GraphWithoutTensorError
from .descriptor import Descriptor
from .se_a import DescrptSeA
Expand Down Expand Up @@ -117,6 +118,9 @@ def __init__(self,
self.sel_all_r = [0]
avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.beta = np.zeros([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.gamma = np.ones([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.attention_layer_variables = None
sub_graph = tf.Graph()
with sub_graph.as_default():
name_pfx = 'd_sea_'
Expand Down Expand Up @@ -305,10 +309,6 @@ def build(self,
self.attn_weight = [None for i in range(self.attn_layer)]
self.angular_weight = [None for i in range(self.attn_layer)]
self.attn_weight_final = [None for i in range(self.attn_layer)]
self.G = None
self.qs = [None for i in range(self.attn_layer)]
self.ks = [None for i in range(self.attn_layer)]
self.vs = [None for i in range(self.attn_layer)]

self.descrpt, self.descrpt_deriv, self.rij, self.nlist, self.nei_type_vec, self.nmask \
= op_module.prod_env_mat_a_mix(coord,
Expand Down Expand Up @@ -365,8 +365,8 @@ def _pass_filter(self,
inputs_i = inputs
inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
type_i = -1
layer, qmat = self._filter(inputs_i, type_i, natoms, name='filter_type_all' + suffix, reuse=reuse,
trainable=trainable, activation_fn=self.filter_activation_fn,
layer, qmat = self._filter(inputs_i, type_i, natoms, name='filter_type_all' + suffix, suffix=suffix,
reuse=reuse, trainable=trainable, activation_fn=self.filter_activation_fn,
type_embedding=type_embedding, atype=atype)
layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
qmat = tf.reshape(qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
Expand Down Expand Up @@ -508,7 +508,8 @@ def _feedforward(self, input_xyz, d_in, d_mid):
activation_fn=None,
precision=self.filter_precision,
trainable=True,
uniform_seed=self.uniform_seed))
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables))
input_xyz = one_layer(
input_xyz,
d_in,
Expand All @@ -518,7 +519,8 @@ def _feedforward(self, input_xyz, d_in, d_mid):
activation_fn=None,
precision=self.filter_precision,
trainable=True,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
input_xyz += residual
input_xyz = tf.keras.layers.LayerNormalization()(input_xyz)
return input_xyz
Expand Down Expand Up @@ -553,75 +555,75 @@ def _attention_layers(
input_r,
dotr=False,
do_mask=False,
trainable=True
trainable=True,
suffix=''
):
sd_k = tf.sqrt(tf.cast(1., dtype=self.filter_precision))
self.G = tf.reshape(input_xyz, (-1, shape_i[1] // 4, outputs_size[-1]))[0]
for i in range(layer_num):
with tf.variable_scope('attention_layer{}_'.format(i), reuse=tf.AUTO_REUSE):
name = 'attention_layer_{}{}'.format(i, suffix)
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
# input_xyz_in = tf.nn.l2_normalize(input_xyz, -1)
Q_c = one_layer(
input_xyz,
self.att_n,
name='c_query',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
K_c = one_layer(
input_xyz,
self.att_n,
name='c_key',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
V_c = one_layer(
input_xyz,
self.att_n,
name='c_value',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
# # natom x nei_type_i x out_size
# xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
# natom x nei_type_i x att_n
Q_c = tf.nn.l2_normalize(tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n)), -1)
K_c = tf.nn.l2_normalize(tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n)), -1)
V_c = tf.nn.l2_normalize(tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n)), -1)
# Q_c = tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n))
# K_c = tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n))
# V_c = tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n))
self.qs[i] = Q_c[0]
self.ks[i] = K_c[0]
self.vs[i] = V_c[0]

input_att = self._scaled_dot_attn(Q_c, K_c, V_c, sd_k, input_r, dotr=dotr, do_mask=do_mask, layer=i)
input_att = tf.reshape(input_att, (-1, self.att_n))

# A_c = tf.nn.softmax(tf.matmul(Q_c, K_c, transpose_b=True)/sd_k)
# # (natom x nei_type_i) x att_n
# input_att = tf.reshape(tf.matmul(A_c, V_c), (-1, self.att_n))

# (natom x nei_type_i) x out_size
input_xyz += one_layer(
input_att,
outputs_size[-1],
name='c_out',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
input_xyz = tf.keras.layers.LayerNormalization()(input_xyz)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
input_xyz = tf.keras.layers.LayerNormalization(beta_initializer=tf.constant_initializer(self.beta[i]),
gamma_initializer=tf.constant_initializer(self.gamma[i]))(input_xyz)
# input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n)
return input_xyz

Expand Down Expand Up @@ -688,7 +690,7 @@ def _filter_lower(
# natom x nei_type_i x out_size
xyz_scatter_att = tf.reshape(
self._attention_layers(xyz_scatter, self.attn_layer, shape_i, outputs_size, input_r,
dotr=self.attn_dotr, do_mask=self.attn_mask, trainable=trainable),
dotr=self.attn_dotr, do_mask=self.attn_mask, trainable=trainable, suffix=suffix),
(-1, shape_i[1] // 4, outputs_size[-1]))
# xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
else:
Expand All @@ -712,6 +714,7 @@ def _filter(
activation_fn=tf.nn.tanh,
stddev=1.0,
bavg=0.0,
suffix='',
name='linear',
reuse=None,
trainable=True):
Expand Down Expand Up @@ -745,6 +748,7 @@ def _filter(
stddev=stddev,
bavg=bavg,
trainable=trainable,
suffix=suffix,
name=name,
reuse=reuse,
atype=atype)
Expand Down Expand Up @@ -775,3 +779,31 @@ def _filter(
result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])

return result, qmat

def init_variables(self,
graph: tf.Graph,
graph_def: tf.GraphDef,
suffix: str = "",
) -> None:
"""
Init the embedding net variables with the given dict
Parameters
----------
graph : tf.Graph
The input frozen model graph
graph_def : tf.GraphDef
The input frozen model graph_def
suffix : str, optional
The suffix of the scope
"""
super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
self.attention_layer_variables = get_attention_layer_variables_from_graph_def(graph_def, suffix=suffix)
if self.attn_layer > 0:
self.beta[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/beta'.format(suffix)]
self.gamma[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/gamma'.format(suffix)]
for i in range(1, self.attn_layer):
self.beta[i] = self.attention_layer_variables[
'attention_layer_{}{}/layer_normalization_{}/beta'.format(i, suffix, i)]
self.gamma[i] = self.attention_layer_variables[
'attention_layer_{}{}/layer_normalization_{}/gamma'.format(i, suffix, i)]
25 changes: 25 additions & 0 deletions deepmd/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
"TRANSFER_PATTERN",
"FITTING_NET_PATTERN",
"EMBEDDING_NET_PATTERN",
"TYPE_EMBEDDING_PATTERN",
"ATTENTION_LAYER_PATTERN",
"TF_VERSION"
]

Expand All @@ -59,18 +61,26 @@
r"filter_type_\d+/matrix_\d+_\d+|"
r"filter_type_\d+/bias_\d+_\d+|"
r"filter_type_\d+/idt_\d+_\d+|"
r"filter_type_all/matrix_\d+|"
r"filter_type_all/matrix_\d+_\d+|"
r"filter_type_all/matrix_\d+_\d+_\d+|"
r"filter_type_all/bias_\d+|"
r"filter_type_all/bias_\d+_\d+|"
r"filter_type_all/bias_\d+_\d+_\d+|"
r"filter_type_all/idt_\d+|"
r"filter_type_all/idt_\d+_\d+|"
)

FITTING_NET_PATTERN = str(
r"layer_\d+/matrix|"
r"layer_\d+_type_\d+/matrix|"
r"layer_\d+/bias|"
r"layer_\d+_type_\d+/bias|"
r"layer_\d+/idt|"
r"layer_\d+_type_\d+/idt|"
r"final_layer/matrix|"
r"final_layer_type_\d+/matrix|"
r"final_layer/bias|"
r"final_layer_type_\d+/bias|"
)

Expand All @@ -80,6 +90,21 @@
r"type_embed_net+/idt_\d+|"
)

ATTENTION_LAYER_PATTERN = str(
r"attention_layer_\d+/c_query/matrix|"
r"attention_layer_\d+/c_query/bias|"
r"attention_layer_\d+/c_key/matrix|"
r"attention_layer_\d+/c_key/bias|"
r"attention_layer_\d+/c_value/matrix|"
r"attention_layer_\d+/c_value/bias|"
r"attention_layer_\d+/c_out/matrix|"
r"attention_layer_\d+/c_out/bias|"
r"attention_layer_\d+/layer_normalization/beta|"
r"attention_layer_\d+/layer_normalization/gamma|"
r"attention_layer_\d+/layer_normalization_\d+/beta|"
r"attention_layer_\d+/layer_normalization_\d+/gamma|"
)

TRANSFER_PATTERN = \
EMBEDDING_NET_PATTERN + \
FITTING_NET_PATTERN + \
Expand Down
17 changes: 12 additions & 5 deletions deepmd/fit/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from deepmd.utils.network import one_layer as one_layer_deepmd
from deepmd.utils.type_embed import embed_atom_type
from deepmd.utils.graph import get_fitting_net_variables_from_graph_def, load_graph_def, get_tensor_by_name_from_graph
from deepmd.utils.errors import GraphWithoutTensorError
from deepmd.fit.fitting import Fitting

from deepmd.env import global_cvt_2_tf_float
Expand Down Expand Up @@ -400,6 +401,8 @@ def build (self,
if input_dict is None:
input_dict = {}
bias_atom_e = self.bias_atom_e
type_embedding = input_dict.get('type_embedding', None)
atype = input_dict.get('atype', None)
if self.numb_fparam > 0:
if self.fparam_avg is None:
self.fparam_avg = 0.
Expand All @@ -418,9 +421,10 @@ def build (self,
t_daparam = tf.constant(self.numb_aparam,
name = 'daparam',
dtype = tf.int32)
self.t_bias_atom_e = tf.get_variable('t_bias_atom_e',
if type_embedding is not None:
self.t_bias_atom_e = tf.get_variable('t_bias_atom_e',
self.bias_atom_e.shape,
dtype=GLOBAL_TF_FLOAT_PRECISION,
dtype=self.fitting_precision,
trainable=False,
initializer=tf.constant_initializer(self.bias_atom_e))
if self.numb_fparam > 0:
Expand Down Expand Up @@ -471,9 +475,7 @@ def build (self,
aparam = tf.reshape(aparam, [-1, self.numb_aparam])
aparam = (aparam - t_aparam_avg) * t_aparam_istd
aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])

type_embedding = input_dict.get('type_embedding', None)
atype = input_dict.get('atype', None)

if type_embedding is not None:
atype_nall = tf.reshape(atype, [-1, natoms[1]])
self.atype_nloc = tf.reshape(tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1]) ## lammps will make error
Expand Down Expand Up @@ -570,6 +572,11 @@ def init_variables(self,
if self.numb_aparam > 0:
self.aparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_avg' % suffix)
self.aparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_istd' % suffix)
try:
self.bias_atom_e = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_bias_atom_e' % suffix)
except GraphWithoutTensorError:
# model without type_embedding has no t_bias_atom_e
pass

def enable_compression(self,
model_file: str,
Expand Down
9 changes: 5 additions & 4 deletions deepmd/train/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ def _init_param(self, jdata):

def build (self,
data = None,
stop_batch = 0) :
stop_batch = 0,
suffix = "") :
self.ntypes = self.model.get_ntypes()
self.stop_batch = stop_batch

Expand Down Expand Up @@ -348,7 +349,7 @@ def build (self,
self.fitting.enable_mixed_precision(self.mixed_prec)

self._build_lr()
self._build_network(data)
self._build_network(data, suffix)
self._build_training()


Expand All @@ -358,7 +359,7 @@ def _build_lr(self):
self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
log.info("built lr")

def _build_network(self, data):
def _build_network(self, data, suffix=""):
self.place_holders = {}
if self.is_compress :
for kk in ['coord', 'box']:
Expand All @@ -379,7 +380,7 @@ def _build_network(self, data):
self.place_holders['default_mesh'],
self.place_holders,
self.frz_model,
suffix = "",
suffix = suffix,
reuse = False)

self.l2_l, self.l2_more\
Expand Down
6 changes: 3 additions & 3 deletions deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ def descrpt_se_atten_args():
Argument("seed", [int, None], optional=True, doc=doc_seed),
Argument("exclude_types", list, optional=True, default=[], doc=doc_exclude_types),
Argument("set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero),
Argument("attn", int, optional=True, default=100, doc=doc_attn),
Argument("attn_layer", int, optional=True, default=4, doc=doc_attn_layer),
Argument("attn_dotr", bool, optional=True, default=False, doc=doc_attn_dotr),
Argument("attn", int, optional=True, default=128, doc=doc_attn),
Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask)
]

Expand Down
Loading

0 comments on commit 6505ea3

Please sign in to comment.