Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

decouple activation function's type from model compression's process in SE_A, now tanh & gelu is both available. #1020

Merged
merged 8 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion deepmd/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def __init__ (self,
self.uniform_seed = uniform_seed
self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron)
self.trainable = trainable
self.compress_activation_fn = get_activation_func(activation_function)
self.filter_activation_fn = get_activation_func(activation_function)
self.filter_precision = get_precision(precision)
self.filter_np_precision = get_np_precision(precision)
Expand Down Expand Up @@ -316,7 +317,8 @@ def enable_compression(self,
The overflow check frequency
"""
self.compress = True
self.table = DPTabulate(model_file, self.type_one_side, self.exclude_types)
self.table = DPTabulate(
model_file, self.type_one_side, self.exclude_types, self.compress_activation_fn)
self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
self.lower, self.upper \
= self.table.build(min_nbor_dist,
Expand Down
49 changes: 37 additions & 12 deletions deepmd/utils/tabulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import math
import logging
import numpy as np
from typing import Callable
from typing import Tuple, List
from deepmd.env import tf
from deepmd.env import op_module
from deepmd.common import ACTIVATION_FN_DICT
from deepmd.utils.sess import run_sess
from deepmd.utils.graph import get_tensor_by_name_from_graph, load_graph_def
from deepmd.utils.graph import get_embedding_net_nodes_from_graph_def
Expand All @@ -30,11 +32,14 @@ class DPTabulate():
exclude_types : List[List[int]]
The excluded pairs of types which have no interaction with each other.
For example, `[[0, 1]]` means no interaction between type 0 and type 1.
activation_function
The activation function in the embedding net. Supported options are {"tanh","gelu"} in common.ACTIVATION_FN_DICT.
"""
def __init__(self,
model_file : str,
type_one_side : bool = False,
exclude_types : List[List[int]] = []) -> None:
exclude_types : List[List[int]] = [],
activation_fn : Callable[[tf.Tensor], tf.Tensor] = tf.nn.tanh) -> None:
"""
Constructor
"""
Expand All @@ -44,6 +49,15 @@ def __init__(self,
self.exclude_types = exclude_types
if self.type_one_side and len(self.exclude_types) != 0:
raise RunTimeError('"type_one_side" is not compatible with "exclude_types"')

# functype
if activation_fn == ACTIVATION_FN_DICT["tanh"]:
self.functype = 1
elif activation_fn == ACTIVATION_FN_DICT["gelu"]:
self.functype = 2
else:
raise RunTimeError("Unknown actication function type!")
Comment on lines +53 to +59
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should support all activation functions in ACTIVATION_FN_DICT

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it will be done in future work.

self.activation_fn = activation_fn

self.graph, self.graph_def = load_graph_def(self.model_file)
self.sess = tf.Session(graph = self.graph)
Expand Down Expand Up @@ -199,26 +213,37 @@ def _make_data(self, xx, idx):
xx = tf.reshape(xx, [xx.size, -1])
for layer in range(self.layer_size):
if layer == 0:
yy = self._layer_0(xx, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
dy = op_module.unaggregated_dy_dx_s(yy, self.matrix["layer_" + str(layer + 1)][idx])
dy2 = op_module.unaggregated_dy2_dx_s(yy, dy, self.matrix["layer_" + str(layer + 1)][idx])
xbar = tf.matmul(
xx, self.matrix["layer_" + str(layer + 1)][idx]) + self.bias["layer_" + str(layer + 1)][idx]
yy = self._layer_0(
xx, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
dy = op_module.unaggregated_dy_dx_s(
yy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
dy2 = op_module.unaggregated_dy2_dx_s(
yy, dy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
else:
tt, yy = self._layer_1(yy, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
dz = op_module.unaggregated_dy_dx(yy - tt, self.matrix["layer_" + str(layer + 1)][idx], dy)
dy2 = op_module.unaggregated_dy2_dx(yy - tt, self.matrix["layer_" + str(layer + 1)][idx], dz, dy, dy2)
ybar = tf.matmul(
yy, self.matrix["layer_" + str(layer + 1)][idx]) + self.bias["layer_" + str(layer + 1)][idx]
tt, zz = self._layer_1(
yy, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
dz = op_module.unaggregated_dy_dx(
zz - tt, self.matrix["layer_" + str(layer + 1)][idx], dy, ybar, tf.constant(self.functype))
dy2 = op_module.unaggregated_dy2_dx(
zz - tt, self.matrix["layer_" + str(layer + 1)][idx], dy, dy2, ybar, tf.constant(self.functype))
dy = dz

vv = yy.eval()
yy = zz

vv = zz.eval()
dd = dy.eval()
d2 = dy2.eval()
return vv, dd, d2

def _layer_0(self, x, w, b):
return tf.nn.tanh(tf.matmul(x, w) + b)
return self.activation_fn(tf.matmul(x, w) + b)

def _layer_1(self, x, w, b):
t = tf.concat([x, x], axis = 1)
return t, tf.nn.tanh(tf.matmul(x, w) + b) + t
t = tf.concat([x, x], axis=1)
return t, self.activation_fn(tf.matmul(x, w) + b) + t

def _save_data(self):
for ii in range(self.ntypes * self.ntypes):
Expand Down
Loading