diff --git a/README.md b/README.md
index 300debaf0..faf016bbc 100644
--- a/README.md
+++ b/README.md
@@ -380,6 +380,28 @@ results = mteb.load_results(models=models, tasks=tasks)
 df = results_to_dataframe(results)
 ```
 
+</details>
+
+
+<details>
+  <summary>  Annotate Contamination in the training data of a model  </summary>
+
+### Annotate Contamination
+
+have your found contamination in the training data of a model? Please let us know, either by opening an issue or ideally by submitting a PR
+annotatig the training datasets of the model:
+
+```py
+model_w_contamination = ModelMeta(
+    name = "model-with-contamination"
+    ...
+    training_datasets: {"ArguAna": # name of dataset within MTEB
+                        ["test"]} # the splits that have been trained on
+    ...
+)
+```
+
+
 </details>
 
 <details>
diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py
index 8dcac9ab0..3dca66b0f 100644
--- a/mteb/evaluation/evaluators/RetrievalEvaluator.py
+++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py
@@ -83,10 +83,10 @@ def __call__(
                 corpus, queries, self.top_k, instructions=instructions, **kwargs
             )
         elif (
-            hasattr(self.retriever.model, "mteb_model_meta")
-            and self.retriever.model.mteb_model_meta.name == "bm25s"
+            hasattr(self.retriever.model.model, "mteb_model_meta")
+            and self.retriever.model.model.mteb_model_meta.name == "bm25s"
         ):
-            return self.retriever.model.search(
+            return self.retriever.model.model.search(
                 corpus,
                 queries,
                 self.top_k,
diff --git a/mteb/model_meta.py b/mteb/model_meta.py
index 68deb02f2..38b77432e 100644
--- a/mteb/model_meta.py
+++ b/mteb/model_meta.py
@@ -76,6 +76,9 @@ class ModelMeta(BaseModel):
         zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models
             are evaluated non-zero-shot unless specified otherwise.
         citation: The citation for the model. This is a bibtex string.
+        training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example
+            {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to
+            a benchmark as well as mark dataset contaminations.
         adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc.
         superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1.
     """
@@ -99,7 +102,7 @@ class ModelMeta(BaseModel):
     reference: STR_URL | None = None
     similarity_fn_name: DISTANCE_METRICS | None = None
     use_instructions: bool | None = None
-    zero_shot_benchmarks: list[str] | None = None
+    training_datasets: dict[str, list[str]] | None = None
     adapted_from: str | None = None
     superseded_by: str | None = None
     citation: str | None = None
diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py
index 1848b9e4e..7d1161cdd 100644
--- a/mteb/models/bm25.py
+++ b/mteb/models/bm25.py
@@ -17,7 +17,7 @@ def bm25_loader(**kwargs):
         import Stemmer
     except ImportError:
         raise ImportError(
-            "bm25s or Stemmer is not installed. Please install it with `pip install bm25s Stemmer`."
+            "bm25s or Stemmer is not installed. Please install it with `pip install bm25s PyStemmer`."
         )
 
     class BM25Search(DRESModel, Wrapper):
@@ -58,7 +58,17 @@ def search(
         ) -> dict[str, dict[str, float]]:
             logger.info("Encoding Corpus...")
             corpus_ids = list(corpus.keys())
-            corpus_with_ids = [{"doc_id": cid, **corpus[cid]} for cid in corpus_ids]
+            corpus_with_ids = [
+                {
+                    "doc_id": cid,
+                    **(
+                        {"text": corpus[cid]}
+                        if isinstance(corpus[cid], str)
+                        else corpus[cid]
+                    ),
+                }
+                for cid in corpus_ids
+            ]
 
             corpus_texts = [
                 "\n".join([doc.get("title", ""), doc["text"]])
diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py
index 2ed0b76a9..3f07a0d23 100644
--- a/mteb/models/cohere_models.py
+++ b/mteb/models/cohere_models.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import torch
+import tqdm
 
 from mteb.encoder_interface import PromptType
 from mteb.model_meta import ModelMeta
@@ -140,25 +141,43 @@ def __init__(
         )
 
     def _embed(
-        self, sentences: list[str], cohere_task_type: str, retries: int = 5
+        self,
+        sentences: list[str],
+        cohere_task_type: str,
+        show_progress_bar: bool = False,
+        retries: int = 5,
     ) -> torch.Tensor:
         import cohere  # type: ignore
 
+        max_batch_size = 256
+
+        batches = [
+            sentences[i : i + max_batch_size]
+            for i in range(0, len(sentences), max_batch_size)
+        ]
+
         client = cohere.Client()
-        while retries > 0:  # Cohere's API is not always reliable
-            try:
-                response = client.embed(
-                    texts=list(sentences),
-                    model=self.model_name,
-                    input_type=cohere_task_type,
-                )
-                break
-            except Exception as e:
-                print(f"Retrying... {retries} retries left.")
-                retries -= 1
-                if retries == 0:
-                    raise e
-        return torch.tensor(response.embeddings)
+
+        all_embeddings = []
+
+        for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar):
+            while retries > 0:  # Cohere's API is not always reliable
+                try:
+                    response = client.embed(
+                        texts=batch,
+                        model=self.model_name,
+                        input_type=cohere_task_type,
+                    )
+                    break
+                except Exception as e:
+                    print(f"Retrying... {retries} retries left.")
+                    retries -= 1
+                    if retries == 0:
+                        raise e
+
+            all_embeddings.extend(torch.tensor(response.embeddings).numpy())
+
+        return np.array(all_embeddings)
 
     def encode(
         self,
@@ -168,13 +187,24 @@ def encode(
         prompt_type: PromptType | None = None,
         **kwargs: Any,
     ) -> np.ndarray:
-        cohere_task_type = self.get_prompt_name(
-            self.model_prompts, task_name, prompt_type
-        )
+        prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type)
+        cohere_task_type = self.model_prompts.get(prompt_name)
+
         if cohere_task_type is None:
             # search_document is recommended if unknown (https://cohere.com/blog/introducing-embed-v3)
             cohere_task_type = "search_document"
-        return self._embed(sentences, cohere_task_type=cohere_task_type).numpy()
+
+        show_progress_bar = (
+            False
+            if "show_progress_bar" not in kwargs
+            else kwargs.pop("show_progress_bar")
+        )
+
+        return self._embed(
+            sentences,
+            cohere_task_type=cohere_task_type,
+            show_progress_bar=show_progress_bar,
+        )
 
 
 model_prompts = {
diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py
index ca2b32b2a..adf96fbe4 100644
--- a/mteb/models/openai_models.py
+++ b/mteb/models/openai_models.py
@@ -5,6 +5,7 @@
 from typing import Any
 
 import numpy as np
+import tqdm
 
 from mteb.model_meta import ModelMeta
 from mteb.requires_package import requires_package
@@ -68,9 +69,15 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray:
             for i in range(0, len(trimmed_sentences), max_batch_size)
         ]
 
+        show_progress_bar = (
+            False
+            if "show_progress_bar" not in kwargs
+            else kwargs.pop("show_progress_bar")
+        )
+
         all_embeddings = []
 
-        for sublist in sublists:
+        for sublist in tqdm.tqdm(sublists, leave=False, disable=not show_progress_bar):
             try:
                 response = self._client.embeddings.create(
                     input=sublist,
diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py
index c22d15afc..6f7d188b7 100644
--- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py
+++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py
@@ -43,6 +43,9 @@ def _load_publichealthqa_data(
         answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))}
 
         for row in data:
+            if row["question"] is None or row["answer"] is None:
+                # There are some questions and answers that are None in the original dataset, specifically in the Arabic subset.
+                continue
             question = row["question"]
             answer = row["answer"]
             query_id = f"Q{question_ids[question]}"
diff --git a/pyproject.toml b/pyproject.toml
index 70e6bd5c9..90abd3575 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mteb"
-version = "1.22.0"
+version = "1.23.0"
 description = "Massive Text Embedding Benchmark"
 readme = "README.md"
 authors = [