From 7f2b9a0c44e23e65fd42136985e66ca70de97b32 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Wed, 31 Aug 2022 07:47:31 +0000 Subject: [PATCH 1/6] add t5-encoder-model --- paddlenlp/transformers/t5/modeling.py | 78 +++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index 402bdfc95b24..815cd300e40b 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -15,9 +15,11 @@ # limitations under the License. import math +from typing import Optional, Tuple import numpy as np import paddle +from paddle.tensor.tensor import Tensor import paddle.nn as nn import paddle.nn.functional as F @@ -26,9 +28,8 @@ from ..nezha.modeling import ACT2FN __all__ = [ - 'T5Model', - "T5PretrainedModel", - 'T5ForConditionalGeneration', + 'T5Model', "T5PretrainedModel", 'T5ForConditionalGeneration', + 'T5EncoderModel' ] @@ -1659,3 +1660,74 @@ def __getattr__(self, name): return getattr(self, self.base_model_prefix).config[name] except KeyError: raise e + + +@register_base_model +class T5EncoderModel(T5PretrainedModel): + + def __init__(self, + vocab_size=32128, + d_model=768, + d_kv=64, + d_ff=3072, + num_layers=12, + num_heads=12, + relative_attention_num_buckets=32, + dropout_rate=0.1, + layer_norm_epsilon=1e-06, + feed_forward_proj="relu", + is_decoder: bool = False, + **kwargs): + super().__init__() + self.shared = nn.Embedding(vocab_size, d_model) + + self.use_cache = False + self.is_encoder_decoder = False + self.encoder = T5Stack(d_model, + num_layers, + layer_norm_epsilon, + dropout_rate, + relative_attention_num_buckets, + d_kv, + num_heads, + feed_forward_proj, + d_ff, + embed_tokens=self.shared, + is_decoder=is_decoder) + + # Initialize weights and apply final processing + self.init_weights() + + def get_input_embeddings(self): + return self.shared + + def set_input_embeddings(self, new_embeddings): + self.shared = new_embeddings + self.encoder.set_input_embeddings(new_embeddings) + + def get_encoder(self): + return self.encoder + + def forward( + self, + input_ids: Tensor = None, + attention_mask: Optional[Tensor] = None, + encoder_hidden_states: Optional[Tuple[Tensor]] = None, + encoder_attention_mask: Optional[Tensor] = None, + cache=None, + use_cache: Optional[bool] = False, + output_attentions: Optional[bool] = False, + output_hidden_states: Optional[bool] = False, + ): + encoder_outputs = self.encoder( + input_ids=input_ids, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + cache=cache, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return encoder_outputs From d41c816510930c3ac1d5054675c725318e8fbf80 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Wed, 31 Aug 2022 07:59:33 +0000 Subject: [PATCH 2/6] update t5model --- paddlenlp/transformers/t5/modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index 815cd300e40b..f7ec19e25bd0 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -19,7 +19,7 @@ import numpy as np import paddle -from paddle.tensor.tensor import Tensor +from paddle import Tensor import paddle.nn as nn import paddle.nn.functional as F From ea3f876d0e0c37bc920c613604deb2843788afcf Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Thu, 15 Sep 2022 10:06:30 +0800 Subject: [PATCH 3/6] update t5encoder & test modeling --- paddlenlp/transformers/t5/modeling.py | 33 ++++++++++++++++++++- tests/transformers/t5/test_modeling.py | 14 +++++---- tests/transformers/test_generation_utils.py | 1 + 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index 21cee05e0fa3..a6ec750c3f90 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import math from typing import Optional, Tuple @@ -1733,8 +1734,8 @@ def __getattr__(self, name): raise e -@register_base_model class T5EncoderModel(T5PretrainedModel): + base_model_class = None def __init__(self, vocab_size=32128, @@ -1750,6 +1751,20 @@ def __init__(self, is_decoder: bool = False, **kwargs): super().__init__() + self.config = { + "vocab_size": vocab_size, + "d_model": d_model, + "d_kv": d_kv, + "d_ff": d_ff, + "num_layers": num_layers, + "num_heads": num_heads, + "relative_attention_num_buckets": relative_attention_num_buckets, + "dropout_rate": dropout_rate, + "layer_norm_epsilon": layer_norm_epsilon, + "feed_forward_proj": feed_forward_proj, + "is_decoder": is_decoder, + } + self.config.update(kwargs) self.shared = nn.Embedding(vocab_size, d_model) self.use_cache = False @@ -1769,6 +1784,19 @@ def __init__(self, # Initialize weights and apply final processing self.init_weights() + def _post_init(self, *args, **kwargs): + """ + **prevent the `config` property to be assigned** + + It would be hooked after `__init__` to add a dict including arguments of + `__init__` as a attribute named `config` of the pretrained model instance. + """ + pass + + @property + def t5(self): + return self + def get_input_embeddings(self): return self.shared @@ -1802,3 +1830,6 @@ def forward( ) return encoder_outputs + + +T5EncoderModel.base_model_class = T5EncoderModel diff --git a/tests/transformers/t5/test_modeling.py b/tests/transformers/t5/test_modeling.py index 8ca7c882e29e..14b14ea21773 100644 --- a/tests/transformers/t5/test_modeling.py +++ b/tests/transformers/t5/test_modeling.py @@ -21,11 +21,14 @@ from tests.testing_utils import slow -from ..test_generation_utils import GenerationTesterMixin -from ..test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +# from ..test_generation_utils import GenerationTesterMixin +# from ..test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor + +from tests.transformers.test_generation_utils import GenerationTesterMixin +from tests.transformers.test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor import paddle -from paddlenlp.transformers import T5ForConditionalGeneration, T5Model, T5Tokenizer +from paddlenlp.transformers import T5ForConditionalGeneration, T5Model, T5Tokenizer, T5EncoderModel from paddlenlp.transformers.t5.modeling import T5_PRETRAINED_MODEL_ARCHIVE_LIST @@ -500,9 +503,10 @@ def prepare_config_and_inputs_for_common(self): class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): base_model_class = T5Model - all_model_classes = (T5Model, T5ForConditionalGeneration) + all_model_classes = (T5Model, T5ForConditionalGeneration, T5EncoderModel) all_generative_model_classes = {T5ForConditionalGeneration: (T5Model, "t5")} - all_parallelizable_model_classes = (T5Model, T5ForConditionalGeneration) + all_parallelizable_model_classes = (T5Model, T5ForConditionalGeneration, + T5EncoderModel) fx_compatible = True test_pruning = False test_resize_embeddings = True diff --git a/tests/transformers/test_generation_utils.py b/tests/transformers/test_generation_utils.py index c6031f641971..06cb23b3646e 100644 --- a/tests/transformers/test_generation_utils.py +++ b/tests/transformers/test_generation_utils.py @@ -498,6 +498,7 @@ def test_sample_generate(self): output_generate[0].tolist()) def test_beam_search_generate(self): + paddle.seed(100) for model_class in self.all_generative_model_classes.keys(): config, input_ids, attention_mask, max_length = self._get_input_ids_and_config( ) From a097681532931f455054a03cc3da635a0e0abd7d Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Fri, 16 Sep 2022 16:20:25 +0800 Subject: [PATCH 4/6] update t5 --- paddlenlp/transformers/t5/modeling.py | 7 +++++-- tests/transformers/t5/test_modeling.py | 7 ++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index a6ec750c3f90..1ad1426de0bb 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -15,7 +15,7 @@ from __future__ import annotations import math -from typing import Optional, Tuple +from typing import Optional, Tuple, Union import numpy as np import paddle @@ -1734,6 +1734,9 @@ def __getattr__(self, name): raise e +from paddle.nn.layer.transformer import MultiHeadAttention + + class T5EncoderModel(T5PretrainedModel): base_model_class = None @@ -1813,7 +1816,7 @@ def forward( attention_mask: Optional[Tensor] = None, encoder_hidden_states: Optional[Tuple[Tensor]] = None, encoder_attention_mask: Optional[Tensor] = None, - cache=None, + cache: Optional[MultiHeadAttention.Cache] = None, use_cache: Optional[bool] = False, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, diff --git a/tests/transformers/t5/test_modeling.py b/tests/transformers/t5/test_modeling.py index 14b14ea21773..d76e1705dbb0 100644 --- a/tests/transformers/t5/test_modeling.py +++ b/tests/transformers/t5/test_modeling.py @@ -21,11 +21,8 @@ from tests.testing_utils import slow -# from ..test_generation_utils import GenerationTesterMixin -# from ..test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor - -from tests.transformers.test_generation_utils import GenerationTesterMixin -from tests.transformers.test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from ..test_generation_utils import GenerationTesterMixin +from ..test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor import paddle from paddlenlp.transformers import T5ForConditionalGeneration, T5Model, T5Tokenizer, T5EncoderModel From 998fbc3b3a1a283233c15663bf5069518ef8217b Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Fri, 16 Sep 2022 16:32:35 +0800 Subject: [PATCH 5/6] update type hinting --- paddlenlp/transformers/t5/modeling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index 1ad1426de0bb..ae8c35baa7a4 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -15,7 +15,7 @@ from __future__ import annotations import math -from typing import Optional, Tuple, Union +from typing import Optional, Tuple, Union, List import numpy as np import paddle @@ -1816,7 +1816,7 @@ def forward( attention_mask: Optional[Tensor] = None, encoder_hidden_states: Optional[Tuple[Tensor]] = None, encoder_attention_mask: Optional[Tensor] = None, - cache: Optional[MultiHeadAttention.Cache] = None, + cache: Optional[List[MultiHeadAttention.Cache]] = None, use_cache: Optional[bool] = False, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, From 2760b16f8f85c4397703de50fbb7018fc89cba41 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Tue, 20 Sep 2022 13:09:21 +0800 Subject: [PATCH 6/6] update cache type annotation --- paddlenlp/transformers/t5/modeling.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py index ae8c35baa7a4..db228d4cedd8 100644 --- a/paddlenlp/transformers/t5/modeling.py +++ b/paddlenlp/transformers/t5/modeling.py @@ -1734,9 +1734,6 @@ def __getattr__(self, name): raise e -from paddle.nn.layer.transformer import MultiHeadAttention - - class T5EncoderModel(T5PretrainedModel): base_model_class = None @@ -1816,7 +1813,7 @@ def forward( attention_mask: Optional[Tensor] = None, encoder_hidden_states: Optional[Tuple[Tensor]] = None, encoder_attention_mask: Optional[Tensor] = None, - cache: Optional[List[MultiHeadAttention.Cache]] = None, + cache=None, use_cache: Optional[bool] = False, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False,