Fixed some tests run locally (#1187)

Always tokenize as batch to return `attention_mask` to preserve the error: ``` The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. ```
openvinotoolkit · Nov 11, 2024 · f9c38cd · f9c38cd
1 parent 790ca60
commit f9c38cd
Showing 1 changed file with 4 additions and 9 deletions.
diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py
@@ -85,9 +85,10 @@ def run_hf_ov_genai_comparison(model_descr, generation_config: Dict, prompt: str
         generation_config_hf['early_stopping'] = STOP_CRITERIA_MAP[generation_config_hf.pop('stop_criteria')]
     generation_config_hf.pop('ignore_eos', None)
 
-    encoded_prompt = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=True)
-    hf_encoded_output = model.generate(encoded_prompt, **generation_config_hf)
-    hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:], skip_special_tokens=True)
+    encoded_prompt = tokenizer([prompt], return_tensors='pt', add_special_tokens=True)
+    prompt_ids, attention_mask = encoded_prompt['input_ids'], encoded_prompt['attention_mask']
+    hf_encoded_output = model.generate(prompt_ids, attention_mask=attention_mask, **generation_config_hf)
+    hf_output = tokenizer.decode(hf_encoded_output[0, prompt_ids.shape[1]:], skip_special_tokens=True)
 
     ov_output = pipe.generate(prompt, **config)
     if config.get('num_return_sequences', 1) > 1:
@@ -179,12 +180,6 @@ def test_ov_tensors(model_descr, inputs):
 @pytest.mark.parametrize("prompt", prompts)
 @pytest.mark.precommit
 @pytest.mark.nightly
-@pytest.mark.xfail(
-    raises=TypeError, 
-    reason="pybind was unable to find ov::Tensor from openvino yet",
-    strict=False,
-    condition=sys.platform in ["linux", "win32"]
-)
 def test_genai_tokenizer_encode(model_descr, prompt):
     model_id, path, tokenizer, model, pipe = read_model(model_descr)
     tok = pipe.get_tokenizer()