From 97c50457ead7e6d58b7ac4dee388ef056d321d83 Mon Sep 17 00:00:00 2001 From: I like data Date: Fri, 20 May 2022 01:09:21 +0530 Subject: [PATCH] Fixes #17128 . VisibleDeprecationWarning is addressed by specifying dtype=object when creating numpy array. Update code based on review feedback. Undo whitespace changes to tokenization_utils_base.py. --- src/transformers/pipelines/question_answering.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index d6f23262d2ab20..0f5fbf0370e708 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -279,7 +279,6 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio truncation="only_second" if question_first else "only_first", max_length=max_seq_len, stride=doc_stride, - return_tensors="np", return_token_type_ids=True, return_overflowing_tokens=True, return_offsets_mapping=True, @@ -294,12 +293,10 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer) # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens) - p_mask = np.asarray( - [ - [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)] - for span_id in range(num_spans) - ] - ) + p_mask = [ + [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)] + for span_id in range(num_spans) + ] features = [] for span_idx in range(num_spans): @@ -316,8 +313,6 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio for cls_index in cls_indices: p_mask[span_idx][cls_index] = 0 submask = p_mask[span_idx] - if isinstance(submask, np.ndarray): - submask = submask.tolist() features.append( SquadFeatures( input_ids=input_ids_span_idx, @@ -344,7 +339,7 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio for i, feature in enumerate(features): fw_args = {} others = {} - model_input_names = self.tokenizer.model_input_names + ["p_mask"] + model_input_names = self.tokenizer.model_input_names + ["p_mask", "token_type_ids"] for k, v in feature.__dict__.items(): if k in model_input_names: