From 80bb95fe90f3aa24cd1d0a1d8f75d866691be7a6 Mon Sep 17 00:00:00 2001 From: yjoonjang Date: Wed, 4 Dec 2024 18:24:45 +0900 Subject: [PATCH] fix: add sleep for too many requests --- mteb/models/openai_models.py | 62 +++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 211e3ce74..36188f7ed 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -19,7 +19,7 @@ def __init__( self, model_name: str, max_tokens: int, - tokenizer_name: str = "cl100k_base", # since all models use this tokenizer now + tokenizer_name: str = "cl100k_base", # since all models use this tokenizer now embed_dim: int | None = None, **kwargs, ) -> None: @@ -28,17 +28,23 @@ def __init__( """ requires_package(self, "openai", "Openai text embedding") from openai import OpenAI + requires_package(self, "tiktoken", "Tiktoken package") + import tiktoken self._client = OpenAI() self._model_name = model_name self._embed_dim = embed_dim self._max_tokens = max_tokens - self._tokenizer_name = tokenizer_name + self._encoding = tiktoken.get_encoding(tokenizer_name) + + def truncate_text_tokens(self, text): + """Truncate a string to have `max_tokens` according to the given encoding.""" + truncated_sentence = self._encoding.encode(text)[:self._max_tokens] + return self._encoding.decode(truncated_sentence) def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: requires_package(self, "openai", "Openai text embedding") - requires_package(self, "tiktoken", "Tiktoken package") - import tiktoken + from openai import NotGiven if self._model_name == "text-embedding-ada-002" and self._embed_dim is not None: @@ -48,11 +54,10 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: trimmed_sentences = [] for sentence in sentences: - encoding = tiktoken.get_encoding(self._tokenizer_name) - encoded_sentence = encoding.encode(sentence) + encoded_sentence = self._encoding.encode(sentence) if len(encoded_sentence) > self._max_tokens: - trimmed_sentence = encoding.decode(encoded_sentence[: self._max_tokens]) - trimmed_sentences.append(trimmed_sentence) + truncated_sentence = self.truncate_text_tokens(sentence) + trimmed_sentences.append(truncated_sentence) else: trimmed_sentences.append(sentence) @@ -65,12 +70,34 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: all_embeddings = [] for sublist in sublists: - response = self._client.embeddings.create( - input=sublist, - model=self._model_name, - encoding_format="float", - dimensions=self._embed_dim or NotGiven(), - ) + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + # Sleep due to too many requests + logger.info("Sleeping for 10 seconds due to error", e) + import time + time.sleep(10) + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + logger.info("Sleeping for 60 seconds due to error", e) + time.sleep(60) + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) all_embeddings.extend(self._to_numpy(response)) return np.array(all_embeddings) @@ -126,7 +153,12 @@ def _to_numpy(self, embedding_response) -> np.ndarray: revision="2", release_date="2022-12-15", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-ada-002"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-ada-002", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), reference="https://openai.com/index/new-and-improved-embedding-model/", max_tokens=8191, embed_dim=1536,