Skip to content

Commit

Permalink
Modify Roformer Doc (PaddlePaddle#1104)
Browse files Browse the repository at this point in the history
* modify transforner-rst

* modify roformer tokenizer

* modify roformer model

* update

* modify transformer

* modify roformer modeling

* modify decoder

* update

* modify tokenizer

* modify token_embedding
  • Loading branch information
huhuiwen99 authored Oct 10, 2021
1 parent 081e285 commit 04de795
Show file tree
Hide file tree
Showing 7 changed files with 555 additions and 114 deletions.
39 changes: 37 additions & 2 deletions paddlenlp/embeddings/token_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,14 @@ def search(self, words):
Returns:
`numpy.array`: The vectors of specifying words.
Examples:
.. code-block::
from paddlenlp.embeddings import TokenEmbedding
embed = TokenEmbedding()
vector = embed.search('Welcome to use PaddlePaddle and PaddleNLP!')
"""
idx_list = self.get_idx_list_from_words(words)
idx_tensor = paddle.to_tensor(idx_list)
Expand Down Expand Up @@ -271,6 +279,15 @@ def get_idx_list_from_words(self, words):
Returns:
`list`: The indexes list of specifying words.
Examples:
.. code-block::
from paddlenlp.embeddings import TokenEmbedding
embed = TokenEmbedding()
index = embed.get_idx_from_word('Welcome to use PaddlePaddle and PaddleNLP!')
#635963
"""
if isinstance(words, str):
idx_list = [self.get_idx_from_word(words)]
Expand Down Expand Up @@ -305,7 +322,16 @@ def dot(self, word_a, word_b):
word_b (`str`): The second word string.
Returns:
`Float`: The dot product of 2 words.
float: The dot product of 2 words.
Examples:
.. code-block::
from paddlenlp.embeddings import TokenEmbedding
embed = TokenEmbedding()
dot_product = embed.dot('PaddlePaddle', 'PaddleNLP!')
#0.11827179
"""
dot = self._dot_np
Expand All @@ -321,7 +347,16 @@ def cosine_sim(self, word_a, word_b):
word_b (`str`): The second word string.
Returns:
`Float`: The cosine similarity of 2 words.
float: The cosine similarity of 2 words.
Examples:
.. code-block::
from paddlenlp.embeddings import TokenEmbedding
embed = TokenEmbedding()
cosine_simi = embed.cosine_sim('PaddlePaddle', 'PaddleNLP!')
#0.99999994
"""
dot = self._dot_np
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/bert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ def forward(self,
tokenizer = BertTokenizer.from_pretrained('bert-wwm-chinese')
model = BertModel.from_pretrained('bert-wwm-chinese')
inputs = tokenizer("欢迎使用百度飞浆!")
inputs = tokenizer("欢迎使用百度飞桨!")
inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
output = model(**inputs)
'''
Expand Down
Loading

0 comments on commit 04de795

Please sign in to comment.