From 34bfef282a2478c061238ff2ce77488fecf3c56a Mon Sep 17 00:00:00 2001 From: Haoran Li Date: Mon, 26 Nov 2018 16:33:03 -0800 Subject: [PATCH] onnx bi-transformer (#385) Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/385 Pull Request resolved: https://github.com/facebookresearch/pytext/pull/6 Pull Request resolved: https://github.com/pytorch/pytorch/pull/14292 Differential Revision: D10517864 fbshipit-source-id: d491b91703461baae69c8c9a1d52d9bcfda75528 --- pytext/models/model.py | 9 +++++++++ pytext/utils/onnx_utils.py | 28 +++++++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/pytext/models/model.py b/pytext/models/model.py index 78963e6bf..8cbb462ec 100644 --- a/pytext/models/model.py +++ b/pytext/models/model.py @@ -118,3 +118,12 @@ def get_model_params_for_optimizer( dense_grads_params[name] = param return sparse_grads_params, dense_grads_params + + def prepare_for_onnx_export_(self, **kwargs): + """Make model exportable via ONNX trace.""" + + def apply_prepare_for_onnx_export_(module): + if module != self and hasattr(module, "prepare_for_onnx_export_"): + module.prepare_for_onnx_export_(**kwargs) + + self.apply(apply_prepare_for_onnx_export_) diff --git a/pytext/utils/onnx_utils.py b/pytext/utils/onnx_utils.py index 693c9b1fa..a5f88bc43 100644 --- a/pytext/utils/onnx_utils.py +++ b/pytext/utils/onnx_utils.py @@ -54,22 +54,28 @@ def create_vocab_index(vocab_list, net, net_workspace, index_name): return vocab_index +def create_vocab_indices_map(c2_prepared, init_net, vocab_map): + vocab_indices = {} + for feat_name, vocab in vocab_map.items(): + assert len(vocab) > 1 + vocab_indices[feat_name] = create_vocab_index( + # Skip index 0 as it is reserved for unkwon tokens + # in Caffe2's index implementation + np.array(vocab[1:], dtype=str), + init_net, + c2_prepared.workspace, + feat_name + "_index", + ) + return vocab_indices + + def add_feats_numericalize_ops(c2_prepared, vocab_map, input_names): predict_net = c2_prepared.predict_net # Protobuf of the predict_net init_net = core.Net(c2_prepared.init_net) final_input_names = input_names.copy() with c2_prepared.workspace._ctx: - vocab_indices = {} - for feat_name, vocab in vocab_map.items(): - assert len(vocab) > 1 - vocab_indices[feat_name] = create_vocab_index( - # Skip index 0 as it is reserved for unkwon tokens - # in Caffe2's index implementation - np.array(vocab[1:], dtype=str), - init_net, - c2_prepared.workspace, - feat_name + "_index", - ) + vocab_indices = create_vocab_indices_map(c2_prepared, init_net, vocab_map) + # Add operators to convert string features to ids based on the vocab final_predict_net = core.Net(c2_prepared.predict_net.name + "_processed") final_inputs = set(