Modify Roformer Doc (PaddlePaddle#1104)

* modify transforner-rst * modify roformer tokenizer * modify roformer model * update * modify transformer * modify roformer modeling * modify decoder * update * modify tokenizer * modify token_embedding
wangxicoding · Oct 10, 2021 · 04de795 · 04de795
1 parent 081e285
commit 04de795
Show file tree

Hide file tree

Showing 7 changed files with 555 additions and 114 deletions.
diff --git a/paddlenlp/embeddings/token_embedding.py b/paddlenlp/embeddings/token_embedding.py
@@ -242,6 +242,14 @@ def search(self, words):
         Returns:
             `numpy.array`: The vectors of specifying words.
 
+        Examples:
+            .. code-block::
+
+                from paddlenlp.embeddings import TokenEmbedding
+
+                embed = TokenEmbedding()
+                vector =  embed.search('Welcome to use PaddlePaddle and PaddleNLP!')
+
         """
         idx_list = self.get_idx_list_from_words(words)
         idx_tensor = paddle.to_tensor(idx_list)
@@ -271,6 +279,15 @@ def get_idx_list_from_words(self, words):
         Returns:
             `list`: The indexes list of specifying words.
 
+        Examples:
+            .. code-block::
+
+                from paddlenlp.embeddings import TokenEmbedding
+
+                embed = TokenEmbedding()
+                index =  embed.get_idx_from_word('Welcome to use PaddlePaddle and PaddleNLP!')
+                #635963
+
         """
         if isinstance(words, str):
             idx_list = [self.get_idx_from_word(words)]
@@ -305,7 +322,16 @@ def dot(self, word_a, word_b):
             word_b (`str`): The second word string.
 
         Returns:
-            `Float`: The dot product of 2 words.
+            float: The dot product of 2 words.
+
+        Examples:
+            .. code-block::
+
+                from paddlenlp.embeddings import TokenEmbedding
+
+                embed = TokenEmbedding()
+                dot_product =  embed.dot('PaddlePaddle', 'PaddleNLP!')
+                #0.11827179
 
         """
         dot = self._dot_np
@@ -321,7 +347,16 @@ def cosine_sim(self, word_a, word_b):
             word_b (`str`): The second word string.
 
         Returns:
-            `Float`: The cosine similarity of 2 words.
+            float: The cosine similarity of 2 words.
+
+        Examples:
+            .. code-block::
+
+                from paddlenlp.embeddings import TokenEmbedding
+
+                embed = TokenEmbedding()
+                cosine_simi =  embed.cosine_sim('PaddlePaddle', 'PaddleNLP!')
+                #0.99999994
 
         """
         dot = self._dot_np

diff --git a/paddlenlp/transformers/bert/modeling.py b/paddlenlp/transformers/bert/modeling.py
@@ -484,7 +484,7 @@ def forward(self,
                 tokenizer = BertTokenizer.from_pretrained('bert-wwm-chinese')
                 model = BertModel.from_pretrained('bert-wwm-chinese')
 
-                inputs = tokenizer("欢迎使用百度飞浆!")
+                inputs = tokenizer("欢迎使用百度飞桨!")
                 inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                 output = model(**inputs)
         '''