Skip to content

Commit

Permalink
feat(cookbook): add onnx_runtime snippet executor (#2498)
Browse files Browse the repository at this point in the history
  • Loading branch information
numb3r3 authored May 28, 2021
1 parent 848e023 commit cbd4787
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions .github/2.0/cookbooks/Executor.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Table of Contents
- [MindSpore](#mindspore)
- [Scikit-learn](#scikit-learn)
- [PyTorch](#pytorch)
- [ONNX-Runtime](#onnx-runtime)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->

Expand Down Expand Up @@ -779,3 +780,55 @@ class PytorchMwuExecutor(Executor):
input_tensor) # multiply the input with the encoding matrix.
doc.embedding = output_tensor.numpy() # assign the encoding results to ``embedding``
```

### ONNX-Runtime

The code snippet bellow converts a `Pytorch` model to the `ONNX` and leverage `onnxruntime` to run inference tasks on models from `hugging-face transformers`.

```python
from pathlib import Path
import numpy as np
import onnxruntime
from jina import Executor, requests
from transformers import BertTokenizerFast, convert_graph_to_onnx
class ONNXBertExecutor(Executor):
def __init__(self, **kwargs):
super().__init__()
# export your huggingface model to onnx
convert_graph_to_onnx.convert(
framework="pt",
model="bert-base-cased",
output=Path("onnx/bert-base-cased.onnx"),
opset=11,
)
# create the tokenizer
self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")
# create the inference session
options = onnxruntime.SessionOptions()
options.intra_op_num_threads = 1 # have an impact on performances
options.graph_optimization_level = (
onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
)
# Load the model as a graph and prepare the CPU backend
self.session = onnxruntime.InferenceSession(
"onnx/bert-base-cased.onnx", options
)
self.session.disable_fallback()
@requests
def encode(self, docs, **kwargs):
for doc in docs:
tokens = self.tokenizer.encode_plus(doc.text)
inputs = {name: np.atleast_2d(value) for name, value in tokens.items()}
output, pooled = self.session.run(None, inputs)
# assign the encoding results to ``embedding``
doc.embedding = pooled[0]
```

0 comments on commit cbd4787

Please sign in to comment.