From 97c50457ead7e6d58b7ac4dee388ef056d321d83 Mon Sep 17 00:00:00 2001
From: I like data <ilikedata@nym.hush.com>
Date: Fri, 20 May 2022 01:09:21 +0530
Subject: [PATCH] Fixes #17128 . VisibleDeprecationWarning is addressed by
 specifying dtype=object when creating numpy array. Update code based on
 review feedback. Undo whitespace changes to tokenization_utils_base.py.

---
 src/transformers/pipelines/question_answering.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py
index d6f23262d2ab20..0f5fbf0370e708 100644
--- a/src/transformers/pipelines/question_answering.py
+++ b/src/transformers/pipelines/question_answering.py
@@ -279,7 +279,6 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio
                 truncation="only_second" if question_first else "only_first",
                 max_length=max_seq_len,
                 stride=doc_stride,
-                return_tensors="np",
                 return_token_type_ids=True,
                 return_overflowing_tokens=True,
                 return_offsets_mapping=True,
@@ -294,12 +293,10 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio
 
             # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
             # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
-            p_mask = np.asarray(
-                [
-                    [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
-                    for span_id in range(num_spans)
-                ]
-            )
+            p_mask = [
+                [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
+                for span_id in range(num_spans)
+            ]
 
             features = []
             for span_idx in range(num_spans):
@@ -316,8 +313,6 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio
                     for cls_index in cls_indices:
                         p_mask[span_idx][cls_index] = 0
                 submask = p_mask[span_idx]
-                if isinstance(submask, np.ndarray):
-                    submask = submask.tolist()
                 features.append(
                     SquadFeatures(
                         input_ids=input_ids_span_idx,
@@ -344,7 +339,7 @@ def preprocess(self, example, padding="do_not_pad", doc_stride=None, max_questio
         for i, feature in enumerate(features):
             fw_args = {}
             others = {}
-            model_input_names = self.tokenizer.model_input_names + ["p_mask"]
+            model_input_names = self.tokenizer.model_input_names + ["p_mask", "token_type_ids"]
 
             for k, v in feature.__dict__.items():
                 if k in model_input_names: