From 93624bfee9121563fe56fb111d53af557878ee83 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 22 Sep 2021 15:14:55 +0200 Subject: [PATCH] Fix non-negligible difference between GPT2 and TFGP2 (#13679) Co-authored-by: ydshieh --- src/transformers/models/gpt2/modeling_tf_gpt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 32ee341814b51e..71ff12c3296a61 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -174,7 +174,7 @@ def __init__(self, n_state, config, **kwargs): nx = config.n_embd self.c_fc = TFConv1D(n_state, nx, initializer_range=config.initializer_range, name="c_fc") self.c_proj = TFConv1D(nx, n_state, initializer_range=config.initializer_range, name="c_proj") - self.act = get_tf_activation("gelu") + self.act = get_tf_activation(config.activation_function) self.dropout = tf.keras.layers.Dropout(config.resid_pdrop) def call(self, x, training=False):