From 1e505570549a78740e53ae8337c7b2dfce555f7d Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 6 Sep 2024 16:20:57 +0100 Subject: [PATCH 01/73] wip: start adding BLIP models --- mteb/models/__init__.py | 2 + mteb/models/blip_models.py | 183 +++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 mteb/models/blip_models.py diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 8e96542925..94358143c1 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -10,6 +10,7 @@ from mteb.models import ( align_models, bge_models, + blip_models, bm25, clip_models, cohere_models, @@ -130,6 +131,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe model_modules = [ align_models, bge_models, + blip_models, bm25, cohere_models, dino_models, diff --git a/mteb/models/blip_models.py b/mteb/models/blip_models.py new file mode 100644 index 0000000000..89b7f7d204 --- /dev/null +++ b/mteb/models/blip_models.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from functools import partial +from typing import Any + +import torch +from PIL import Image +from torch.utils.data import DataLoader +from tqdm import tqdm +from transformers import AutoModel, AutoProcessor + +from mteb.model_meta import ModelMeta + + +class BLIPModelWrapper: + def __init__( + self, + model_name: str, + device: str = "cuda" if torch.cuda.is_available() else "cpu", + **kwargs: Any, + ): + self.model_name = model_name + self.device = device + self.model = AutoModel.from_pretrained(model_name).to(self.device) + self.processor = AutoProcessor.from_pretrained(model_name) + + def preprocess( + self, + texts: list[str], + images: list[Image.Image], + ): + return self.processor( + text=texts, images=images, return_tensors="pt", padding=True + ) + + def get_text_embeddings(self, texts: list[str], batch_size: int = 32): + all_text_embeddings = [] + + with torch.no_grad(): + for i in tqdm(range(0, len(texts), batch_size)): + batch_texts = texts[i : i + batch_size] + inputs = self.processor( + text=batch_texts, return_tensors="pt", padding=True, truncation=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + text_outputs = self.model.get_text_features(**inputs) + all_text_embeddings.append(text_outputs.cpu()) + + all_text_embeddings = torch.cat(all_text_embeddings, dim=0) + return all_text_embeddings + + def get_image_embeddings( + self, images: list[Image.Image] | DataLoader, batch_size: int = 32 + ): + all_image_embeddings = [] + + if isinstance(images, DataLoader): + with torch.no_grad(): + for batch in tqdm(images): + inputs = self.processor( + images=batch, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.get_image_features(**inputs) + all_image_embeddings.append(image_outputs.cpu()) + else: + with torch.no_grad(): + for i in tqdm(range(0, len(images), batch_size)): + batch_images = images[i : i + batch_size] + inputs = self.processor( + images=batch_images, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.get_image_features(**inputs) + all_image_embeddings.append(image_outputs.cpu()) + + all_image_embeddings = torch.cat(all_image_embeddings, dim=0) + return all_image_embeddings + + def calculate_probs(self, text_embeddings, image_embeddings): + text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) + image_embeddings = image_embeddings / image_embeddings.norm( + dim=-1, keepdim=True + ) + logits = torch.matmul(image_embeddings, text_embeddings.T) + probs = (logits * 100).softmax(dim=-1) + return probs + + def get_fused_embeddings( + self, + texts: list[str] = None, + images: list[Image.Image] | DataLoader = None, + fusion_mode="sum", + batch_size: int = 32, + ): + # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings + if texts is None and images is None: + raise ValueError("Either texts or images must be provided") + + text_embeddings = None + image_embeddings = None + + if texts is not None: + text_embeddings = self.get_text_embeddings(texts, batch_size) + + if images is not None: + image_embeddings = self.get_image_embeddings(images, batch_size) + + if text_embeddings is not None and image_embeddings is not None: + if len(text_embeddings) != len(image_embeddings): + raise ValueError( + "The number of texts and images must have the same length" + ) + if fusion_mode == "sum": + fused_embeddings = text_embeddings + image_embeddings + else: + # to do: add other fusion mode + raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") + return fused_embeddings + elif text_embeddings is not None: + return text_embeddings + elif image_embeddings is not None: + return image_embeddings + + +""" +TODO: implement all model variants + +Salesforce/blip-image-captioning-large +Image-to-Text • Updated Dec 7, 2023 • +1.16M • +• +1.04k +Salesforce/blip-image-captioning-base +Image-to-Text • Updated Aug 1, 2023 • +857k • +• +475 +Salesforce/blip-vqa-base +Visual Question Answering • Updated Dec 7, 2023 • +168k • +119 +Salesforce/blip-vqa-capfilt-large +Visual Question Answering • Updated Jan 22 • +90.6k • +44 +Salesforce/blip-itm-base-coco +Updated Aug 1, 2023 • +12.8k • +16 +Salesforce/blip-itm-large-coco +Updated Aug 1, 2023 • +9.9k +Salesforce/blip-itm-base-flickr +Updated Aug 1, 2023 • +65 +Salesforce/blip-itm-large-flickr +Updated Aug 1, 2023 • +459 • +2 +""" + +blip_image_captioning_base = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-image-captioning-base", + ), + name="Salesforce/blip-image-captioning-base", + languages=["eng_Latn"], + open_source=True, + revision="89b09ea1789f7addf2f6d6f0dfc4ce10ab58ef84", + release_date="2023-08-01", +) + + +if __name__ == "__main__": + import mteb + + mdl = mteb.get_model( + blip_image_captioning_base.name, blip_image_captioning_base.revision + ) + emb = mdl.get_text_embeddings(["Hello, world!"]) + print(emb.shape) From 8f8e05cb3e3f1d4a773e3e6a1136d7d307c872d9 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 9 Sep 2024 16:17:43 +0100 Subject: [PATCH 02/73] add other blip variants --- mteb/models/blip_models.py | 85 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/mteb/models/blip_models.py b/mteb/models/blip_models.py index 89b7f7d204..ead46b63e7 100644 --- a/mteb/models/blip_models.py +++ b/mteb/models/blip_models.py @@ -159,6 +159,18 @@ def get_fused_embeddings( 459 • 2 """ +# in descending order of usage (downloads from huggingface) +blip_image_captioning_large = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-image-captioning-large", + ), + name="Salesforce/blip-image-captioning-large", + languages=["eng_Latn"], + open_source=True, + revision="2227ac38c9f16105cb0412e7cab4759978a8fd90", + release_date="2023-12-07", +) blip_image_captioning_base = ModelMeta( loader=partial( @@ -173,6 +185,79 @@ def get_fused_embeddings( ) +blip_vqa_base = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-vqa-base", + ), + name="Salesforce/blip-vqa-base", + languages=["eng_Latn"], + open_source=True, + revision="c7df8e7cd7aa2ee9af18f56e2b29e59a92651b64", + release_date="2023-12-07", +) + +blip_vqa_capfilt_large = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-vqa-capfilt-large", + ), + name="Salesforce/blip-vqa-capfilt-large", + languages=["eng_Latn"], + open_source=True, + revision="e53f95265aeab69013fabb5380500ab984adbbb4", + release_date="2023-01-22", +) + +blip_itm_base_coco = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-itm-base-coco", + ), + name="Salesforce/blip-itm-base-coco", + languages=["eng_Latn"], + open_source=True, + revision="7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f", + release_date="2023-08-01", +) + +blip_itm_large_coco = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-itm-large-coco", + ), + name="Salesforce/blip-itm-large-coco", + languages=["eng_Latn"], + open_source=True, + revision="fef05cafc05298067cbbca00b125749394a77a6f", + release_date="2023-08-01", +) + +blip_itm_base_flickr = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-itm-base-flickr", + ), + name="Salesforce/blip-itm-base-flickr", + languages=["eng_Latn"], + open_source=True, + revision="1de29e660d91ae1786c1876212ea805a22eab251", + release_date="2023-08-01", +) + +blip_itm_large_flickr = ModelMeta( + loader=partial( + BLIPModelWrapper, + model_name="Salesforce/blip-itm-large-flickr", + ), + name="Salesforce/blip-itm-large-flickr", + languages=["eng_Latn"], + open_source=True, + revision="bda12e6506758f54261b5ab174b2c55a3ba143fb", + release_date="2023-08-01", +) + + if __name__ == "__main__": import mteb From be8b4bbd007e274cc622c7291a24b2cb23c080c8 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 11 Sep 2024 15:50:31 +0100 Subject: [PATCH 03/73] wip: add blip2_models.py --- mteb/models/blip2_models.py | 235 ++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 mteb/models/blip2_models.py diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py new file mode 100644 index 0000000000..5db3d01c37 --- /dev/null +++ b/mteb/models/blip2_models.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +from functools import partial +from typing import Any + +import torch +from torch.nn.functional import normalize +from PIL import Image +from torch.utils.data import DataLoader +from tqdm import tqdm +from transformers import BlipForImageTextRetrieval, BlipProcessor + +from mteb.model_meta import ModelMeta + + +class BLIP2ModelWrapper: + def __init__( + self, + model_name: str, + device: str = "cuda" if torch.cuda.is_available() else "cpu", + **kwargs: Any, + ): + self.model_name = model_name + self.device = device + self.model = BlipForImageTextRetrieval.from_pretrained(model_name).to(self.device) + self.processor = BlipProcessor.from_pretrained(model_name) + + def preprocess( + self, + texts: list[str], + images: list[Image.Image], + ): + return self.processor( + text=texts, images=images, return_tensors="pt", padding=True + ) + + def get_text_embeddings(self, texts: list[str], batch_size: int = 32): + all_text_embeddings = [] + + with torch.no_grad(): + for i in tqdm(range(0, len(texts), batch_size)): + batch_texts = texts[i : i + batch_size] + inputs = self.processor( + text=batch_texts, return_tensors="pt", padding=True, truncation=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + # different to CLIPModelWrapper: text_encoder instead of get_text_features and apply projection and normalization + text_outputs = self.model.text_encoder(**inputs) + text_outputs = text_outputs[0] + text_outputs = normalize(self.model.text_proj(text_outputs[:,0,:]), dim=-1) + all_text_embeddings.append(text_outputs.cpu()) + + all_text_embeddings = torch.cat(all_text_embeddings, dim=0) + return all_text_embeddings + + def get_image_embeddings( + self, images: list[Image.Image] | DataLoader, batch_size: int = 32 + ): + all_image_embeddings = [] + + if isinstance(images, DataLoader): + with torch.no_grad(): + for batch in tqdm(images): + inputs = self.processor( + images=batch, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize(self.model.vision_proj(image_outputs[:,0,:]), dim=-1) + all_image_embeddings.append(image_outputs.cpu()) + else: + with torch.no_grad(): + for i in tqdm(range(0, len(images), batch_size)): + batch_images = images[i : i + batch_size] + inputs = self.processor( + images=batch_images, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.get_image_features(**inputs) + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize(self.model.vision_proj(image_outputs[:,0,:]), dim=-1) + all_image_embeddings.append(image_outputs.cpu()) + + all_image_embeddings = torch.cat(all_image_embeddings, dim=0) + return all_image_embeddings + + def calculate_probs(self, text_embeddings, image_embeddings): + text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) + image_embeddings = image_embeddings / image_embeddings.norm( + dim=-1, keepdim=True + ) + logits = torch.matmul(image_embeddings, text_embeddings.T) + probs = (logits * 100).softmax(dim=-1) + return probs + + def get_fused_embeddings( + self, + texts: list[str] = None, + images: list[Image.Image] | DataLoader = None, + fusion_mode="sum", + batch_size: int = 32, + ): + # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings + if texts is None and images is None: + raise ValueError("Either texts or images must be provided") + + text_embeddings = None + image_embeddings = None + + if texts is not None: + text_embeddings = self.get_text_embeddings(texts, batch_size) + + if images is not None: + image_embeddings = self.get_image_embeddings(images, batch_size) + + if text_embeddings is not None and image_embeddings is not None: + if len(text_embeddings) != len(image_embeddings): + raise ValueError( + "The number of texts and images must have the same length" + ) + if fusion_mode == "sum": + fused_embeddings = text_embeddings + image_embeddings + else: + # to do: add other fusion mode + raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") + return fused_embeddings + elif text_embeddings is not None: + return text_embeddings + elif image_embeddings is not None: + return image_embeddings + + +""" + +Salesforce/blip2-opt-2.7b +Image-to-Text • Updated Mar 22 • +588k • +296 +Salesforce/blip2-flan-t5-xxl +Image-to-Text • Updated Mar 29 • +9.23k • +84 +Salesforce/blip2-opt-6.7b-coco +Image-to-Text • Updated Mar 31 • +1.51k • +28 +Salesforce/blip2-opt-6.7b +Image-to-Text • Updated Mar 27 • +4.93k • +71 +Salesforce/blip2-flan-t5-xl +Image-to-Text • Updated Dec 13, 2023 • +95.9k • +56 +""" +# in descending order of usage (downloads from huggingface) + +blip2_opt_2_7b = ModelMeta( + loader=partial( + BLIP2ModelWrapper, + model_name="Salesforce/blip2-opt-2.7b", + ), + name="Salesforce/blip2-opt-2.7b", + languages=["eng_Latn"], + open_source=True, + revision="51572668da0eb669e01a189dc22abe6088589a24", + release_date="2024-03-22", +) + +blip2_flan_t5_xxl = ModelMeta( + loader=partial( + BLIP2ModelWrapper, + model_name="Salesforce/blip2-flan-t5-xxl", + ), + name="Salesforce/blip2-flan-t5-xxl", + languages=["eng_Latn"], + open_source=True, + revision="43206cbc865b9d5b3dd7d080e5d94b4143ca8e74", + release_date="2024-03-29", +) + +blip2_opt_6_7b_coco = ModelMeta( + loader=partial( + BLIP2ModelWrapper, + model_name="Salesforce/blip2-opt-6.7b-coco", + ), + name="Salesforce/blip2-opt-6.7b-coco", + languages=["eng_Latn"], + open_source=True, + revision="0d580de59320a25a4d2c386387bcef310d5f286e", + release_date="2024-03-31", +) + +blip2_opt_6_7b = ModelMeta( + loader=partial( + BLIP2ModelWrapper, + model_name="Salesforce/blip2-opt-6.7b", + ), + name="Salesforce/blip2-opt-6.7b", + languages=["eng_Latn"], + open_source=True, + revision="1d33d60155fd1323b97556e0f1dd5148a9749f5b", + release_date="2024-03-27", +) + +blip2_flan_t5_xl = ModelMeta( + loader=partial( + BLIP2ModelWrapper, + model_name="Salesforce/blip2-flan-t5-xl", + ), + name="Salesforce/blip2-flan-t5-xl", + languages=["eng_Latn"], + open_source=True, + revision="e5025a34e3e769e72e2aab7f7bfd00bc84d5fd77", + release_date="2023-12-13", +) + +if __name__ == "__main__": + import mteb + + mdl = mteb.get_model( + blip2_opt_2_7b.name, blip2_opt_2_7b.revision + ) + emb = mdl.get_text_embeddings(["Hello, world!"]) + emb2 = mdl.get_text_embeddings(["Hello there, world!"]) + emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) + + sim = torch.nn.functional.cosine_similarity(emb, emb2) + print(sim) + + sim = torch.nn.functional.cosine_similarity(emb, emb3) + print(sim) + From b57a395d5e103d0677c4547ebbb8f9f35564a202 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 11 Sep 2024 16:26:45 +0100 Subject: [PATCH 04/73] make lint --- .../abstasks/Image/AbsTaskAny2AnyRetrieval.py | 2 +- .../Image/AbsTaskImageClassification.py | 2 +- mteb/abstasks/Image/AbsTaskImageClustering.py | 2 +- .../AbsTaskImageMultilabelClassification.py | 2 +- .../AbsTaskImageTextPairClassification.py | 2 +- .../Image/AbsTaskZeroshotClassification.py | 2 +- mteb/models/blip2_models.py | 25 +++--- mteb/models/blip_models.py | 77 ++++++++----------- mteb/models/instructions.py | 2 - mteb/models/ru_sentence_models.py | 2 - mteb/models/sentence_transformers_models.py | 2 - .../Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py | 3 +- .../eng/FashionIQIT2IRetrieval.py | 3 +- .../eng/HatefulMemesI2TRetrieval.py | 3 +- .../eng/HatefulMemesT2IRetrieval.py | 3 +- .../eng/InfoSeekIT2ITRetrieval.py | 3 +- .../eng/InfoSeekIT2TRetrieval.py | 3 +- .../eng/MemotionI2TRetrieval.py | 3 +- .../eng/MemotionT2IRetrieval.py | 3 +- .../eng/NIGHTSI2IRetrieval.py | 3 +- .../eng/OVENIT2ITRetrieval.py | 3 +- .../Any2AnyRetrieval/eng/OVENIT2TRetrieval.py | 3 +- .../eng/SciMMIRI2TRetrieval.py | 3 +- .../eng/SciMMIRT2IRetrieval.py | 3 +- .../eng/TUBerlinT2IRetrieval.py | 3 +- .../eng/VisualNewsI2TRetrieval.py | 3 +- .../eng/VisualNewsT2IRetrieval.py | 3 +- .../eng/WebQAT2ITRetrieval.py | 3 +- .../Any2AnyRetrieval/eng/WebQAT2TRetrieval.py | 3 +- .../multilingual/WITT2IRetrieval.py | 3 +- .../multilingual/XFlickr30kCoT2IRetrieval.py | 3 +- .../multilingual/XM3600T2IRetrieval.py | 3 +- mteb/tasks/Image/Clustering/eng/CIFAR.py | 3 +- .../eng/BirdsnapClassification.py | 3 +- .../Image/ImageClassification/eng/CIFAR.py | 3 +- .../eng/Caltech101Classification.py | 3 +- .../eng/DTDClassification.py | 3 +- .../eng/EuroSATClassification.py | 3 +- .../eng/FER2013Classification.py | 3 +- .../eng/FGVCAircraftClassification.py | 3 +- .../eng/Food101Classification.py | 3 +- .../eng/MNISTClassification.py | 3 +- .../eng/OxfordFlowersClassification.py | 3 +- .../eng/OxfordPetsClassification.py | 3 +- .../eng/RESISC45Classification.py | 3 +- .../eng/STL10Classification.py | 3 +- .../eng/SUN397Classification.py | 3 +- .../eng/StanfordCarsClassification.py | 3 +- .../ZeroshotClassification/eng/Birdsnap.py | 3 +- .../Image/ZeroshotClassification/eng/CIFAR.py | 3 +- .../ZeroshotClassification/eng/Caltech101.py | 3 +- .../Image/ZeroshotClassification/eng/DTD.py | 3 +- .../ZeroshotClassification/eng/EuroSAT.py | 3 +- .../ZeroshotClassification/eng/FER2013.py | 3 +- .../eng/FGVCAircraft.py | 3 +- .../ZeroshotClassification/eng/Food101.py | 3 +- .../Image/ZeroshotClassification/eng/MNIST.py | 3 +- .../ZeroshotClassification/eng/OxfordPets.py | 3 +- .../ZeroshotClassification/eng/RESISC45.py | 3 +- .../Image/ZeroshotClassification/eng/STL10.py | 3 +- .../ZeroshotClassification/eng/SUN397.py | 3 +- .../eng/StanfordCars.py | 3 +- 62 files changed, 103 insertions(+), 170 deletions(-) diff --git a/mteb/abstasks/Image/AbsTaskAny2AnyRetrieval.py b/mteb/abstasks/Image/AbsTaskAny2AnyRetrieval.py index 9c5987f4b1..c640988e91 100644 --- a/mteb/abstasks/Image/AbsTaskAny2AnyRetrieval.py +++ b/mteb/abstasks/Image/AbsTaskAny2AnyRetrieval.py @@ -12,9 +12,9 @@ from datasets import Features, Value, load_dataset from PIL import Image -from ..AbsTask import AbsTask from ...evaluation.evaluators import Any2AnyRetrievalEvaluator from ...load_results.mteb_results import ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/Image/AbsTaskImageClassification.py b/mteb/abstasks/Image/AbsTaskImageClassification.py index 3a95f2bd29..715f007e10 100644 --- a/mteb/abstasks/Image/AbsTaskImageClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageClassification.py @@ -6,7 +6,6 @@ import numpy as np -from ..AbsTask import AbsTask from ...encoder_interface import Encoder from ...evaluation.evaluators import ( ImagekNNClassificationEvaluator, @@ -14,6 +13,7 @@ ImagelogRegClassificationEvaluator, ) from ...load_results.mteb_results import HFSubset, ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/Image/AbsTaskImageClustering.py b/mteb/abstasks/Image/AbsTaskImageClustering.py index 5370b16b15..3d6f7e88d2 100644 --- a/mteb/abstasks/Image/AbsTaskImageClustering.py +++ b/mteb/abstasks/Image/AbsTaskImageClustering.py @@ -5,10 +5,10 @@ from datasets import Dataset -from ..AbsTask import AbsTask from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode from ...evaluation.evaluators import ImageClusteringEvaluator from ...load_results.mteb_results import HFSubset, ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py b/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py index 5669575a18..6a0d649f10 100644 --- a/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py @@ -12,9 +12,9 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import MultiLabelBinarizer -from ..AbsTask import AbsTask from ...encoder_interface import Encoder from ...load_results.mteb_results import HFSubset, ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py index 492de11659..81f3094b5c 100644 --- a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py @@ -6,10 +6,10 @@ from datasets import Dataset from tqdm import tqdm -from ..AbsTask import AbsTask from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode from ...evaluation.evaluators import ImageTextPairClassificationEvaluator from ...load_results.mteb_results import ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/Image/AbsTaskZeroshotClassification.py b/mteb/abstasks/Image/AbsTaskZeroshotClassification.py index 9d5a55e235..4f23bb46b4 100644 --- a/mteb/abstasks/Image/AbsTaskZeroshotClassification.py +++ b/mteb/abstasks/Image/AbsTaskZeroshotClassification.py @@ -5,10 +5,10 @@ from datasets import Dataset -from ..AbsTask import AbsTask from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode from ...evaluation.evaluators import ZeroshotClassificationEvaluator from ...load_results.mteb_results import ScoresDict +from ..AbsTask import AbsTask logger = logging.getLogger(__name__) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 5db3d01c37..3181c5f5ac 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -4,8 +4,8 @@ from typing import Any import torch -from torch.nn.functional import normalize from PIL import Image +from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm from transformers import BlipForImageTextRetrieval, BlipProcessor @@ -22,7 +22,9 @@ def __init__( ): self.model_name = model_name self.device = device - self.model = BlipForImageTextRetrieval.from_pretrained(model_name).to(self.device) + self.model = BlipForImageTextRetrieval.from_pretrained(model_name).to( + self.device + ) self.processor = BlipProcessor.from_pretrained(model_name) def preprocess( @@ -47,7 +49,9 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): # different to CLIPModelWrapper: text_encoder instead of get_text_features and apply projection and normalization text_outputs = self.model.text_encoder(**inputs) text_outputs = text_outputs[0] - text_outputs = normalize(self.model.text_proj(text_outputs[:,0,:]), dim=-1) + text_outputs = normalize( + self.model.text_proj(text_outputs[:, 0, :]), dim=-1 + ) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -67,7 +71,9 @@ def get_image_embeddings( inputs = {k: v.to(self.device) for k, v in inputs.items()} image_outputs = self.model.vision_model(**inputs) image_outputs = image_outputs[0] - image_outputs = normalize(self.model.vision_proj(image_outputs[:,0,:]), dim=-1) + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -80,7 +86,9 @@ def get_image_embeddings( image_outputs = self.model.get_image_features(**inputs) image_outputs = self.model.vision_model(**inputs) image_outputs = image_outputs[0] - image_outputs = normalize(self.model.vision_proj(image_outputs[:,0,:]), dim=-1) + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) all_image_embeddings.append(image_outputs.cpu()) all_image_embeddings = torch.cat(all_image_embeddings, dim=0) @@ -220,16 +228,13 @@ def get_fused_embeddings( if __name__ == "__main__": import mteb - mdl = mteb.get_model( - blip2_opt_2_7b.name, blip2_opt_2_7b.revision - ) + mdl = mteb.get_model(blip2_opt_2_7b.name, blip2_opt_2_7b.revision) emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) - + sim = torch.nn.functional.cosine_similarity(emb, emb2) print(sim) sim = torch.nn.functional.cosine_similarity(emb, emb3) print(sim) - diff --git a/mteb/models/blip_models.py b/mteb/models/blip_models.py index ead46b63e7..dff6014246 100644 --- a/mteb/models/blip_models.py +++ b/mteb/models/blip_models.py @@ -5,9 +5,10 @@ import torch from PIL import Image +from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm -from transformers import AutoModel, AutoProcessor +from transformers import BlipForImageTextRetrieval, BlipProcessor from mteb.model_meta import ModelMeta @@ -21,8 +22,10 @@ def __init__( ): self.model_name = model_name self.device = device - self.model = AutoModel.from_pretrained(model_name).to(self.device) - self.processor = AutoProcessor.from_pretrained(model_name) + self.model = BlipForImageTextRetrieval.from_pretrained(model_name).to( + self.device + ) + self.processor = BlipProcessor.from_pretrained(model_name) def preprocess( self, @@ -43,7 +46,12 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): text=batch_texts, return_tensors="pt", padding=True, truncation=True ) inputs = {k: v.to(self.device) for k, v in inputs.items()} - text_outputs = self.model.get_text_features(**inputs) + # different to CLIPModelWrapper: text_encoder instead of get_text_features and apply projection and normalization + text_outputs = self.model.text_encoder(**inputs) + text_outputs = text_outputs[0] + text_outputs = normalize( + self.model.text_proj(text_outputs[:, 0, :]), dim=-1 + ) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -61,7 +69,11 @@ def get_image_embeddings( images=batch, return_tensors="pt", padding=True ) inputs = {k: v.to(self.device) for k, v in inputs.items()} - image_outputs = self.model.get_image_features(**inputs) + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -72,6 +84,11 @@ def get_image_embeddings( ) inputs = {k: v.to(self.device) for k, v in inputs.items()} image_outputs = self.model.get_image_features(**inputs) + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) all_image_embeddings.append(image_outputs.cpu()) all_image_embeddings = torch.cat(all_image_embeddings, dim=0) @@ -93,7 +110,6 @@ def get_fused_embeddings( fusion_mode="sum", batch_size: int = 32, ): - # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings if texts is None and images is None: raise ValueError("Either texts or images must be provided") @@ -123,42 +139,6 @@ def get_fused_embeddings( return image_embeddings -""" -TODO: implement all model variants - -Salesforce/blip-image-captioning-large -Image-to-Text • Updated Dec 7, 2023 • -1.16M • -• -1.04k -Salesforce/blip-image-captioning-base -Image-to-Text • Updated Aug 1, 2023 • -857k • -• -475 -Salesforce/blip-vqa-base -Visual Question Answering • Updated Dec 7, 2023 • -168k • -119 -Salesforce/blip-vqa-capfilt-large -Visual Question Answering • Updated Jan 22 • -90.6k • -44 -Salesforce/blip-itm-base-coco -Updated Aug 1, 2023 • -12.8k • -16 -Salesforce/blip-itm-large-coco -Updated Aug 1, 2023 • -9.9k -Salesforce/blip-itm-base-flickr -Updated Aug 1, 2023 • -65 -Salesforce/blip-itm-large-flickr -Updated Aug 1, 2023 • -459 • -2 -""" # in descending order of usage (downloads from huggingface) blip_image_captioning_large = ModelMeta( loader=partial( @@ -261,8 +241,13 @@ def get_fused_embeddings( if __name__ == "__main__": import mteb - mdl = mteb.get_model( - blip_image_captioning_base.name, blip_image_captioning_base.revision - ) + mdl = mteb.get_model(blip_itm_base_coco.name, blip_itm_base_coco.revision) emb = mdl.get_text_embeddings(["Hello, world!"]) - print(emb.shape) + emb2 = mdl.get_text_embeddings(["Hello there, world!"]) + emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) + + sim = torch.nn.functional.cosine_similarity(emb, emb2) + print(sim) + + sim = torch.nn.functional.cosine_similarity(emb, emb3) + print(sim) diff --git a/mteb/models/instructions.py b/mteb/models/instructions.py index 99054e41d7..4a31f8da02 100644 --- a/mteb/models/instructions.py +++ b/mteb/models/instructions.py @@ -2,8 +2,6 @@ from __future__ import annotations -from __future__ import annotations - import mteb # Prompts from diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index cffe7f7be4..30214c21f2 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -2,8 +2,6 @@ from __future__ import annotations -from __future__ import annotations - from functools import partial from mteb.model_meta import ModelMeta diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 33ea17b165..a3603d9eb3 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -2,8 +2,6 @@ from __future__ import annotations -from __future__ import annotations - from mteb.model_meta import ModelMeta paraphrase_langs = [ diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py index eb65b82e79..417e5d6caa 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class CIRRIT2IRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py index b336549557..a58ed15dd5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class FashionIQIT2IRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py index 1fcf9f0cb9..817ea1c674 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py @@ -2,9 +2,8 @@ from datasets import concatenate_datasets, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py index 5b2b9bcaef..0a55e446ed 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py @@ -2,9 +2,8 @@ from datasets import concatenate_datasets, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py index 5029c51ec9..f7cb041bcb 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class InfoSeekIT2ITRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py index cd08aa77b2..cc2b23ea88 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class InfoSeekIT2TRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py index af68e278b9..9247a12f88 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py @@ -2,9 +2,8 @@ from datasets import concatenate_datasets, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py index 7478ddddeb..f214bd2ea5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py @@ -2,9 +2,8 @@ from datasets import concatenate_datasets, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py index 82dcf0894a..73d3f7c280 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class NIGHTSI2IRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py index 51d031241c..0f53eb7e6a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class OVENIT2ITRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py index cfa07350ba..3df5b92625 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class OVENIT2TRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py index fa0f5b5707..eb2c24aeb2 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py @@ -2,9 +2,8 @@ from datasets import load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py index c6004e7840..e92bd637f5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py @@ -2,9 +2,8 @@ from datasets import load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata def _load_data(path: str, splits: str, cache_dir: str = None, revision: str = None): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py index 018f708ce5..7c7bddfe4c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class TUBerlinT2IRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py index c1f1b306ca..2de1713097 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class VisualNewsI2TRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py index 7457f00d03..091d7a7f00 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class VisualNewsT2IRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py index 7086c1c205..50725b79b9 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class WebQAT2ITRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py index 6a4efb261a..14c9c02148 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata class WebQAT2TRetrieval(AbsTaskAny2AnyRetrieval): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py index a0395594a2..5de06b937f 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py @@ -2,10 +2,9 @@ from datasets import Dataset, DatasetDict, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { "ar": ["ara-Arab"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py index 92f4a9c2c0..65c886f314 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py @@ -2,10 +2,9 @@ from datasets import DatasetDict, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { "de": ["deu-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py index 8cb7f0e9d1..687c9f0446 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py @@ -2,10 +2,9 @@ from datasets import Dataset, DatasetDict, load_dataset -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { "ar": ["ara-Arab"], diff --git a/mteb/tasks/Image/Clustering/eng/CIFAR.py b/mteb/tasks/Image/Clustering/eng/CIFAR.py index 01b493233c..e7f7a1d633 100644 --- a/mteb/tasks/Image/Clustering/eng/CIFAR.py +++ b/mteb/tasks/Image/Clustering/eng/CIFAR.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClustering import AbsTaskImageClustering +from mteb.abstasks.TaskMetadata import TaskMetadata class CIFAR10Clustering(AbsTaskImageClustering): diff --git a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py index a104d51e13..38016e5e79 100644 --- a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class BirdsnapClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py index 75e3cdf6fc..9b4f45e387 100644 --- a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class CIFAR10Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py index 0175cd8663..fe62f955b3 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class Caltech101Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py index 2f921e5587..25f6ba0401 100644 --- a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class DTDClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py index b849d93c0b..4930c13d1b 100644 --- a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class EuroSATClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py index 2081683154..9db8b017f7 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class FER2013Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py index bb09f32426..9b061e6dd1 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class FGVCAircraftClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py index 533b2c2145..04389db8f1 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class Food101Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py index 82de6fab16..f3831abdb4 100644 --- a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class MNISTClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py index dce55d9362..c0a10de48d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class OxfordFlowersClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py index 0277098d64..cf537648ed 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class OxfordPetsClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py index e883db4c6e..afbc8fe1da 100644 --- a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class RESISC45Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py index 9b9fcf3ef4..9531e1c1f6 100644 --- a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class STL10Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py index 414f3560e6..eef0ccbfcb 100644 --- a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class SUN397Classification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py index 1fa4f64af2..e4561b2165 100644 --- a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py @@ -1,8 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks.TaskMetadata import TaskMetadata class StanfordCarsClassification(AbsTaskImageClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/Birdsnap.py b/mteb/tasks/Image/ZeroshotClassification/eng/Birdsnap.py index 9273b66add..ed31e3f89f 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/Birdsnap.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/Birdsnap.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class BirdsnapClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/CIFAR.py b/mteb/tasks/Image/ZeroshotClassification/eng/CIFAR.py index 517bf565cc..81103a0f1d 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/CIFAR.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class CIFAR10ZeroShotClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/Caltech101.py b/mteb/tasks/Image/ZeroshotClassification/eng/Caltech101.py index f07c423939..ab7ca141cb 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/Caltech101.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/Caltech101.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class Caltech101Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/DTD.py b/mteb/tasks/Image/ZeroshotClassification/eng/DTD.py index caea933534..27ef0a6f3d 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/DTD.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/DTD.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class DTDClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/EuroSAT.py b/mteb/tasks/Image/ZeroshotClassification/eng/EuroSAT.py index 275487580d..de6fb4c434 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/EuroSAT.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/EuroSAT.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class EuroSATClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/FER2013.py b/mteb/tasks/Image/ZeroshotClassification/eng/FER2013.py index febbb27e5e..9cfa0dd3e9 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/FER2013.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/FER2013.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class FER2013Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/FGVCAircraft.py b/mteb/tasks/Image/ZeroshotClassification/eng/FGVCAircraft.py index 833afde477..c15e0b6d4b 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/FGVCAircraft.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/FGVCAircraft.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class FGVCAircraftClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/Food101.py b/mteb/tasks/Image/ZeroshotClassification/eng/Food101.py index a2b93c2471..fd073ac412 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/Food101.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/Food101.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class Food101Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/MNIST.py b/mteb/tasks/Image/ZeroshotClassification/eng/MNIST.py index f343cb9211..253fa938ac 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/MNIST.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/MNIST.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class MNISTClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/OxfordPets.py b/mteb/tasks/Image/ZeroshotClassification/eng/OxfordPets.py index 2145fe8bff..3da580af1b 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/OxfordPets.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/OxfordPets.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class OxfordPetsClassification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/RESISC45.py b/mteb/tasks/Image/ZeroshotClassification/eng/RESISC45.py index 7ba9824455..d6fb98ba6c 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/RESISC45.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/RESISC45.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class RESISC45Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/STL10.py b/mteb/tasks/Image/ZeroshotClassification/eng/STL10.py index 11c53d5032..8b0f42d08d 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/STL10.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/STL10.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class STL10Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/SUN397.py b/mteb/tasks/Image/ZeroshotClassification/eng/SUN397.py index c3e67879b0..64252584b8 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/SUN397.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/SUN397.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class SUN397Classification(AbsTaskZeroshotClassification): diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/StanfordCars.py b/mteb/tasks/Image/ZeroshotClassification/eng/StanfordCars.py index 0e881b65f0..c8cc639a4e 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/StanfordCars.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/StanfordCars.py @@ -1,10 +1,9 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata - from mteb.abstasks.Image.AbsTaskZeroshotClassification import ( AbsTaskZeroshotClassification, ) +from mteb.abstasks.TaskMetadata import TaskMetadata class StanfordCarsClassification(AbsTaskZeroshotClassification): From 236a94f67aa630e4cd2295f481f886e08c98d310 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 13 Sep 2024 16:39:39 +0100 Subject: [PATCH 05/73] wip: implement blip2 wrapper --- mteb/models/__init__.py | 2 + mteb/models/blip2_models.py | 257 ++++++++++++++++++------------------ 2 files changed, 133 insertions(+), 126 deletions(-) diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 94358143c1..2229b70239 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -11,6 +11,7 @@ align_models, bge_models, blip_models, + blip2_models, bm25, clip_models, cohere_models, @@ -132,6 +133,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe align_models, bge_models, blip_models, + blip2_models, bm25, cohere_models, dino_models, diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 3181c5f5ac..16acabc0ef 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -8,140 +8,145 @@ from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm -from transformers import BlipForImageTextRetrieval, BlipProcessor +from transformers import Blip2Processor from mteb.model_meta import ModelMeta +def blip2_loader(**kwargs): + try: # a temporal fix for the dependency issues of vista models. + from lavis.models import load_model_and_preprocess -class BLIP2ModelWrapper: - def __init__( - self, - model_name: str, - device: str = "cuda" if torch.cuda.is_available() else "cpu", - **kwargs: Any, - ): - self.model_name = model_name - self.device = device - self.model = BlipForImageTextRetrieval.from_pretrained(model_name).to( - self.device - ) - self.processor = BlipProcessor.from_pretrained(model_name) - - def preprocess( - self, - texts: list[str], - images: list[Image.Image], - ): - return self.processor( - text=texts, images=images, return_tensors="pt", padding=True + except ImportError: + raise ImportError( + "Please install `pip install salesforce-lavis` to use BLIP-2 models." ) + + class BLIP2ModelWrapper: + def __init__( + self, + model_name: str, + device: str = "cuda" if torch.cuda.is_available() else "cpu", + **kwargs: Any, + ): + self.model_name = model_name + self.device = device + self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(name="blip2-opt-2.7b", model_type="base") + self.model = self.model.to(self.device) + self.processor = Blip2Processor.from_pretrained(model_name) + + def preprocess( + self, + texts: list[str], + images: list[Image.Image], + ): + return self.processor( + text=texts, images=images, return_tensors="pt", padding=True + ) + + def get_text_embeddings(self, texts: list[str], batch_size: int = 32): + all_text_embeddings = [] - def get_text_embeddings(self, texts: list[str], batch_size: int = 32): - all_text_embeddings = [] - - with torch.no_grad(): - for i in tqdm(range(0, len(texts), batch_size)): - batch_texts = texts[i : i + batch_size] - inputs = self.processor( - text=batch_texts, return_tensors="pt", padding=True, truncation=True - ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - # different to CLIPModelWrapper: text_encoder instead of get_text_features and apply projection and normalization - text_outputs = self.model.text_encoder(**inputs) - text_outputs = text_outputs[0] - text_outputs = normalize( - self.model.text_proj(text_outputs[:, 0, :]), dim=-1 - ) - all_text_embeddings.append(text_outputs.cpu()) - - all_text_embeddings = torch.cat(all_text_embeddings, dim=0) - return all_text_embeddings - - def get_image_embeddings( - self, images: list[Image.Image] | DataLoader, batch_size: int = 32 - ): - all_image_embeddings = [] - - if isinstance(images, DataLoader): - with torch.no_grad(): - for batch in tqdm(images): - inputs = self.processor( - images=batch, return_tensors="pt", padding=True - ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - image_outputs = self.model.vision_model(**inputs) - image_outputs = image_outputs[0] - image_outputs = normalize( - self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 - ) - all_image_embeddings.append(image_outputs.cpu()) - else: with torch.no_grad(): - for i in tqdm(range(0, len(images), batch_size)): - batch_images = images[i : i + batch_size] + for i in tqdm(range(0, len(texts), batch_size)): + batch_texts = texts[i : i + batch_size] inputs = self.processor( - images=batch_images, return_tensors="pt", padding=True + text=batch_texts, return_tensors="pt", padding=True, truncation=True ) inputs = {k: v.to(self.device) for k, v in inputs.items()} - image_outputs = self.model.get_image_features(**inputs) - image_outputs = self.model.vision_model(**inputs) - image_outputs = image_outputs[0] - image_outputs = normalize( - self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 - ) - all_image_embeddings.append(image_outputs.cpu()) - - all_image_embeddings = torch.cat(all_image_embeddings, dim=0) - return all_image_embeddings - - def calculate_probs(self, text_embeddings, image_embeddings): - text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) - image_embeddings = image_embeddings / image_embeddings.norm( - dim=-1, keepdim=True - ) - logits = torch.matmul(image_embeddings, text_embeddings.T) - probs = (logits * 100).softmax(dim=-1) - return probs - - def get_fused_embeddings( - self, - texts: list[str] = None, - images: list[Image.Image] | DataLoader = None, - fusion_mode="sum", - batch_size: int = 32, - ): - # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings - if texts is None and images is None: - raise ValueError("Either texts or images must be provided") - - text_embeddings = None - image_embeddings = None - - if texts is not None: - text_embeddings = self.get_text_embeddings(texts, batch_size) - - if images is not None: - image_embeddings = self.get_image_embeddings(images, batch_size) - - if text_embeddings is not None and image_embeddings is not None: - if len(text_embeddings) != len(image_embeddings): - raise ValueError( - "The number of texts and images must have the same length" - ) - if fusion_mode == "sum": - fused_embeddings = text_embeddings + image_embeddings + + text_outputs = self.model.forward_text(**inputs) + text_outputs = torch.functional.normalize(self.model.text_proj(text_outputs)) + all_text_embeddings.append(text_outputs.cpu()) + + all_text_embeddings = torch.cat(all_text_embeddings, dim=0) + return all_text_embeddings + + def get_image_embeddings( + self, images: list[Image.Image] | DataLoader, batch_size: int = 32 + ): + all_image_embeddings = [] + + if isinstance(images, DataLoader): + with torch.no_grad(): + for batch in tqdm(images): + inputs = self.processor( + images=batch, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) + all_image_embeddings.append(image_outputs.cpu()) else: - # to do: add other fusion mode - raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") - return fused_embeddings - elif text_embeddings is not None: - return text_embeddings - elif image_embeddings is not None: - return image_embeddings + with torch.no_grad(): + for i in tqdm(range(0, len(images), batch_size)): + batch_images = images[i : i + batch_size] + inputs = self.processor( + images=batch_images, return_tensors="pt", padding=True + ) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + image_outputs = self.model.get_image_features(**inputs) + image_outputs = self.model.vision_model(**inputs) + image_outputs = image_outputs[0] + image_outputs = normalize( + self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 + ) + all_image_embeddings.append(image_outputs.cpu()) + + all_image_embeddings = torch.cat(all_image_embeddings, dim=0) + return all_image_embeddings + + def calculate_probs(self, text_embeddings, image_embeddings): + text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) + image_embeddings = image_embeddings / image_embeddings.norm( + dim=-1, keepdim=True + ) + logits = torch.matmul(image_embeddings, text_embeddings.T) + probs = (logits * 100).softmax(dim=-1) + return probs + + def get_fused_embeddings( + self, + texts: list[str] = None, + images: list[Image.Image] | DataLoader = None, + fusion_mode="sum", + batch_size: int = 32, + ): + # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings + if texts is None and images is None: + raise ValueError("Either texts or images must be provided") + + text_embeddings = None + image_embeddings = None + + if texts is not None: + text_embeddings = self.get_text_embeddings(texts, batch_size) + + if images is not None: + image_embeddings = self.get_image_embeddings(images, batch_size) + + if text_embeddings is not None and image_embeddings is not None: + if len(text_embeddings) != len(image_embeddings): + raise ValueError( + "The number of texts and images must have the same length" + ) + if fusion_mode == "sum": + fused_embeddings = text_embeddings + image_embeddings + else: + # to do: add other fusion mode + raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") + return fused_embeddings + elif text_embeddings is not None: + return text_embeddings + elif image_embeddings is not None: + return image_embeddings + + return BLIP2ModelWrapper(**kwargs) """ - Salesforce/blip2-opt-2.7b Image-to-Text • Updated Mar 22 • 588k • @@ -167,7 +172,7 @@ def get_fused_embeddings( blip2_opt_2_7b = ModelMeta( loader=partial( - BLIP2ModelWrapper, + blip2_loader, model_name="Salesforce/blip2-opt-2.7b", ), name="Salesforce/blip2-opt-2.7b", @@ -179,7 +184,7 @@ def get_fused_embeddings( blip2_flan_t5_xxl = ModelMeta( loader=partial( - BLIP2ModelWrapper, + blip2_loader, model_name="Salesforce/blip2-flan-t5-xxl", ), name="Salesforce/blip2-flan-t5-xxl", @@ -191,7 +196,7 @@ def get_fused_embeddings( blip2_opt_6_7b_coco = ModelMeta( loader=partial( - BLIP2ModelWrapper, + blip2_loader, model_name="Salesforce/blip2-opt-6.7b-coco", ), name="Salesforce/blip2-opt-6.7b-coco", @@ -203,7 +208,7 @@ def get_fused_embeddings( blip2_opt_6_7b = ModelMeta( loader=partial( - BLIP2ModelWrapper, + blip2_loader, model_name="Salesforce/blip2-opt-6.7b", ), name="Salesforce/blip2-opt-6.7b", @@ -215,7 +220,7 @@ def get_fused_embeddings( blip2_flan_t5_xl = ModelMeta( loader=partial( - BLIP2ModelWrapper, + blip2_loader, model_name="Salesforce/blip2-flan-t5-xl", ), name="Salesforce/blip2-flan-t5-xl", @@ -228,7 +233,7 @@ def get_fused_embeddings( if __name__ == "__main__": import mteb - mdl = mteb.get_model(blip2_opt_2_7b.name, blip2_opt_2_7b.revision) + mdl = mteb.get_model(blip2_opt_2_7b.name, blip2_opt_2_7b.revision, device="cpu") emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) From 1f2f8c3d0960cd0af18620060262a53bfbe91f5c Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 15 Sep 2024 21:18:05 +0100 Subject: [PATCH 06/73] feat: add blip2 models, still mismatched names --- mteb/models/blip2_models.py | 133 ++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 67 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 16acabc0ef..12dc1cfa51 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -2,20 +2,21 @@ from functools import partial from typing import Any +from types import SimpleNamespace import torch from PIL import Image from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm -from transformers import Blip2Processor +from transformers import Blip2Processor, BertTokenizer from mteb.model_meta import ModelMeta def blip2_loader(**kwargs): try: # a temporal fix for the dependency issues of vista models. from lavis.models import load_model_and_preprocess - + from lavis.models.blip2_models.blip2_image_text_matching import Blip2ITM, Blip2Qformer except ImportError: raise ImportError( "Please install `pip install salesforce-lavis` to use BLIP-2 models." @@ -30,8 +31,7 @@ def __init__( ): self.model_name = model_name self.device = device - self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(name="blip2-opt-2.7b", model_type="base") - self.model = self.model.to(self.device) + self.model = Blip2ITM.from_pretrained("pretrain").to(self.device).float() self.processor = Blip2Processor.from_pretrained(model_name) def preprocess( @@ -49,13 +49,15 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): with torch.no_grad(): for i in tqdm(range(0, len(texts), batch_size)): batch_texts = texts[i : i + batch_size] - inputs = self.processor( - text=batch_texts, return_tensors="pt", padding=True, truncation=True - ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - - text_outputs = self.model.forward_text(**inputs) - text_outputs = torch.functional.normalize(self.model.text_proj(text_outputs)) + text_tokens = self.model.tokenizer( + batch_texts, + padding="max_length", + truncation=True, + max_length=self.model.max_txt_len, + return_tensors="pt", + ).to(self.device) + text_outputs = self.model.forward_text(text_tokens) + text_outputs = normalize(self.model.text_proj(text_outputs)) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -72,8 +74,7 @@ def get_image_embeddings( inputs = self.processor( images=batch, return_tensors="pt", padding=True ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - image_outputs = self.model.vision_model(**inputs) + image_outputs = self.model.forward_image(inputs["pixel_values"].to(self.device)) image_outputs = image_outputs[0] image_outputs = normalize( self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 @@ -85,10 +86,8 @@ def get_image_embeddings( batch_images = images[i : i + batch_size] inputs = self.processor( images=batch_images, return_tensors="pt", padding=True - ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - image_outputs = self.model.get_image_features(**inputs) - image_outputs = self.model.vision_model(**inputs) + )["pixel_values"].to(self.device) + image_outputs = self.model.forward_image(inputs) image_outputs = image_outputs[0] image_outputs = normalize( self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 @@ -98,6 +97,43 @@ def get_image_embeddings( all_image_embeddings = torch.cat(all_image_embeddings, dim=0) return all_image_embeddings + def get_multimodal_embeddings( + self, texts, images, batch_size + ): + all_multimodal_embeddings = [] + + with torch.no_grad(): + if isinstance(images, DataLoader): + for batch_images, i in tqdm(zip(images, range(0, len(texts), batch_size))): + batch_texts = texts[i : i + batch_size] + + image_inputs = self.processor( + images=batch_images, return_tensors="pt", padding=True + )["pixel_values"].to(self.device) + multimodal_outputs = self.model.extract_features({ + "text_input": batch_texts, + "image": image_inputs + }).multimodal_embeds + + all_multimodal_embeddings.append(multimodal_outputs.cpu()) + else: + for i in tqdm(range(0, len(texts), batch_size)): + batch_images = images[i : i + batch_size] + batch_texts = texts[i : i + batch_size] + + image_inputs = self.processor( + images=batch_images, return_tensors="pt", padding=True + )["pixel_values"].to(self.device) + multimodal_outputs = self.model.extract_features({ + "text_input": batch_texts, + "image": image_inputs + }).multimodal_embeds + + all_multimodal_embeddings.append(multimodal_outputs.cpu()) + + + return torch.cat(all_multimodal_embeddings, dim=0) + def calculate_probs(self, text_embeddings, image_embeddings): text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) image_embeddings = image_embeddings / image_embeddings.norm( @@ -111,7 +147,7 @@ def get_fused_embeddings( self, texts: list[str] = None, images: list[Image.Image] | DataLoader = None, - fusion_mode="sum", + fusion_mode="multimodal", batch_size: int = 32, ): # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings @@ -134,6 +170,8 @@ def get_fused_embeddings( ) if fusion_mode == "sum": fused_embeddings = text_embeddings + image_embeddings + if fusion_mode == "multimodal": + fused_embeddings = self.get_multimodal_embeddings(texts, images, batch_size) else: # to do: add other fusion mode raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") @@ -170,7 +208,7 @@ def get_fused_embeddings( """ # in descending order of usage (downloads from huggingface) -blip2_opt_2_7b = ModelMeta( +blip2_image_text_matching = ModelMeta( loader=partial( blip2_loader, model_name="Salesforce/blip2-opt-2.7b", @@ -182,58 +220,12 @@ def get_fused_embeddings( release_date="2024-03-22", ) -blip2_flan_t5_xxl = ModelMeta( - loader=partial( - blip2_loader, - model_name="Salesforce/blip2-flan-t5-xxl", - ), - name="Salesforce/blip2-flan-t5-xxl", - languages=["eng_Latn"], - open_source=True, - revision="43206cbc865b9d5b3dd7d080e5d94b4143ca8e74", - release_date="2024-03-29", -) - -blip2_opt_6_7b_coco = ModelMeta( - loader=partial( - blip2_loader, - model_name="Salesforce/blip2-opt-6.7b-coco", - ), - name="Salesforce/blip2-opt-6.7b-coco", - languages=["eng_Latn"], - open_source=True, - revision="0d580de59320a25a4d2c386387bcef310d5f286e", - release_date="2024-03-31", -) - -blip2_opt_6_7b = ModelMeta( - loader=partial( - blip2_loader, - model_name="Salesforce/blip2-opt-6.7b", - ), - name="Salesforce/blip2-opt-6.7b", - languages=["eng_Latn"], - open_source=True, - revision="1d33d60155fd1323b97556e0f1dd5148a9749f5b", - release_date="2024-03-27", -) - -blip2_flan_t5_xl = ModelMeta( - loader=partial( - blip2_loader, - model_name="Salesforce/blip2-flan-t5-xl", - ), - name="Salesforce/blip2-flan-t5-xl", - languages=["eng_Latn"], - open_source=True, - revision="e5025a34e3e769e72e2aab7f7bfd00bc84d5fd77", - release_date="2023-12-13", -) if __name__ == "__main__": import mteb + import PIL.Image - mdl = mteb.get_model(blip2_opt_2_7b.name, blip2_opt_2_7b.revision, device="cpu") + mdl = mteb.get_model(blip2_image_text_matching.name, blip2_image_text_matching.revision, device="cpu") emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) @@ -243,3 +235,10 @@ def get_fused_embeddings( sim = torch.nn.functional.cosine_similarity(emb, emb3) print(sim) + + cat_img = Image.open("cat.jpg") + cat_text = "An image of a cat" + + multi_emv = mdl.get_multimodal_embeddings([cat_text], [cat_img], 32) + + From 8c6486087ad2790b724c03110b149e337f77b9b0 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 15 Sep 2024 21:28:31 +0100 Subject: [PATCH 07/73] fix: remove projections from image and text embeddings --- mteb/models/blip2_models.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 12dc1cfa51..86f0676b0e 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -57,7 +57,7 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): return_tensors="pt", ).to(self.device) text_outputs = self.model.forward_text(text_tokens) - text_outputs = normalize(self.model.text_proj(text_outputs)) + #text_outputs = normalize(self.model.text_proj(text_outputs)) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -75,10 +75,8 @@ def get_image_embeddings( images=batch, return_tensors="pt", padding=True ) image_outputs = self.model.forward_image(inputs["pixel_values"].to(self.device)) - image_outputs = image_outputs[0] - image_outputs = normalize( - self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 - ) + image_outputs = image_outputs[0][:, 0, :] + #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -98,7 +96,7 @@ def get_image_embeddings( return all_image_embeddings def get_multimodal_embeddings( - self, texts, images, batch_size + self, texts, images, batch_size=32 ): all_multimodal_embeddings = [] @@ -113,7 +111,7 @@ def get_multimodal_embeddings( multimodal_outputs = self.model.extract_features({ "text_input": batch_texts, "image": image_inputs - }).multimodal_embeds + }).multimodal_embeds[:,0,:] all_multimodal_embeddings.append(multimodal_outputs.cpu()) else: @@ -127,7 +125,7 @@ def get_multimodal_embeddings( multimodal_outputs = self.model.extract_features({ "text_input": batch_texts, "image": image_inputs - }).multimodal_embeds + }).multimodal_embeds[:,0,:] all_multimodal_embeddings.append(multimodal_outputs.cpu()) @@ -239,6 +237,16 @@ def get_fused_embeddings( cat_img = Image.open("cat.jpg") cat_text = "An image of a cat" - multi_emv = mdl.get_multimodal_embeddings([cat_text], [cat_img], 32) + multi_cat_emb = mdl.get_multimodal_embeddings([cat_text], [cat_img]) + text_cat_emb = mdl.get_text_embeddings(["An photo of a cat"]) + text_dog_emb = mdl.get_text_embeddings(["An image of a dog"]) + + print(multi_cat_emb.shape) + + sim1 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_cat_emb) + sim2 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_dog_emb) + + print(sim1, sim2) + From 20839ca93b114bf4a5011aeaa41846a9f0f32482 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 15 Sep 2024 21:52:30 +0100 Subject: [PATCH 08/73] make lint --- mteb/models/__init__.py | 2 +- mteb/models/blip2_models.py | 95 +++++++++++++++---------------------- 2 files changed, 40 insertions(+), 57 deletions(-) diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 2229b70239..eabe5a2d3f 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -10,8 +10,8 @@ from mteb.models import ( align_models, bge_models, - blip_models, blip2_models, + blip_models, bm25, clip_models, cohere_models, diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 86f0676b0e..b735b65cf3 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -2,26 +2,29 @@ from functools import partial from typing import Any -from types import SimpleNamespace import torch from PIL import Image from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm -from transformers import Blip2Processor, BertTokenizer +from transformers import Blip2Processor from mteb.model_meta import ModelMeta + def blip2_loader(**kwargs): try: # a temporal fix for the dependency issues of vista models. from lavis.models import load_model_and_preprocess - from lavis.models.blip2_models.blip2_image_text_matching import Blip2ITM, Blip2Qformer + from lavis.models.blip2_models.blip2_image_text_matching import ( + Blip2ITM, + Blip2Qformer, + ) except ImportError: raise ImportError( "Please install `pip install salesforce-lavis` to use BLIP-2 models." ) - + class BLIP2ModelWrapper: def __init__( self, @@ -57,7 +60,7 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): return_tensors="pt", ).to(self.device) text_outputs = self.model.forward_text(text_tokens) - #text_outputs = normalize(self.model.text_proj(text_outputs)) + # text_outputs = normalize(self.model.text_proj(text_outputs)) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -74,9 +77,11 @@ def get_image_embeddings( inputs = self.processor( images=batch, return_tensors="pt", padding=True ) - image_outputs = self.model.forward_image(inputs["pixel_values"].to(self.device)) + image_outputs = self.model.forward_image( + inputs["pixel_values"].to(self.device) + ) image_outputs = image_outputs[0][:, 0, :] - #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) + # image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -95,23 +100,22 @@ def get_image_embeddings( all_image_embeddings = torch.cat(all_image_embeddings, dim=0) return all_image_embeddings - def get_multimodal_embeddings( - self, texts, images, batch_size=32 - ): + def get_multimodal_embeddings(self, texts, images, batch_size=32): all_multimodal_embeddings = [] with torch.no_grad(): if isinstance(images, DataLoader): - for batch_images, i in tqdm(zip(images, range(0, len(texts), batch_size))): + for batch_images, i in tqdm( + zip(images, range(0, len(texts), batch_size)) + ): batch_texts = texts[i : i + batch_size] - - image_inputs = self.processor( + + image_inputs = self.processor( images=batch_images, return_tensors="pt", padding=True )["pixel_values"].to(self.device) - multimodal_outputs = self.model.extract_features({ - "text_input": batch_texts, - "image": image_inputs - }).multimodal_embeds[:,0,:] + multimodal_outputs = self.model.extract_features( + {"text_input": batch_texts, "image": image_inputs} + ).multimodal_embeds[:, 0, :] all_multimodal_embeddings.append(multimodal_outputs.cpu()) else: @@ -119,21 +123,21 @@ def get_multimodal_embeddings( batch_images = images[i : i + batch_size] batch_texts = texts[i : i + batch_size] - image_inputs = self.processor( + image_inputs = self.processor( images=batch_images, return_tensors="pt", padding=True )["pixel_values"].to(self.device) - multimodal_outputs = self.model.extract_features({ - "text_input": batch_texts, - "image": image_inputs - }).multimodal_embeds[:,0,:] + multimodal_outputs = self.model.extract_features( + {"text_input": batch_texts, "image": image_inputs} + ).multimodal_embeds[:, 0, :] all_multimodal_embeddings.append(multimodal_outputs.cpu()) - return torch.cat(all_multimodal_embeddings, dim=0) def calculate_probs(self, text_embeddings, image_embeddings): - text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) + text_embeddings = text_embeddings / text_embeddings.norm( + dim=-1, keepdim=True + ) image_embeddings = image_embeddings / image_embeddings.norm( dim=-1, keepdim=True ) @@ -169,42 +173,22 @@ def get_fused_embeddings( if fusion_mode == "sum": fused_embeddings = text_embeddings + image_embeddings if fusion_mode == "multimodal": - fused_embeddings = self.get_multimodal_embeddings(texts, images, batch_size) + fused_embeddings = self.get_multimodal_embeddings( + texts, images, batch_size + ) else: # to do: add other fusion mode - raise ValueError(f"fusion mode {fusion_mode} hasn't been implemented") + raise ValueError( + f"fusion mode {fusion_mode} hasn't been implemented" + ) return fused_embeddings elif text_embeddings is not None: return text_embeddings elif image_embeddings is not None: return image_embeddings - - return BLIP2ModelWrapper(**kwargs) + return BLIP2ModelWrapper(**kwargs) -""" -Salesforce/blip2-opt-2.7b -Image-to-Text • Updated Mar 22 • -588k • -296 -Salesforce/blip2-flan-t5-xxl -Image-to-Text • Updated Mar 29 • -9.23k • -84 -Salesforce/blip2-opt-6.7b-coco -Image-to-Text • Updated Mar 31 • -1.51k • -28 -Salesforce/blip2-opt-6.7b -Image-to-Text • Updated Mar 27 • -4.93k • -71 -Salesforce/blip2-flan-t5-xl -Image-to-Text • Updated Dec 13, 2023 • -95.9k • -56 -""" -# in descending order of usage (downloads from huggingface) blip2_image_text_matching = ModelMeta( loader=partial( @@ -220,10 +204,12 @@ def get_fused_embeddings( if __name__ == "__main__": + import mteb - import PIL.Image - mdl = mteb.get_model(blip2_image_text_matching.name, blip2_image_text_matching.revision, device="cpu") + mdl = mteb.get_model( + blip2_image_text_matching.name, blip2_image_text_matching.revision, device="cpu" + ) emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) @@ -247,6 +233,3 @@ def get_fused_embeddings( sim2 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_dog_emb) print(sim1, sim2) - - - From ec47c690261169ac8d197af476eccb0ae32a187d Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 15 Sep 2024 22:17:35 +0100 Subject: [PATCH 09/73] wip: add coco BLIP2 --- mteb/models/blip2_models.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index b735b65cf3..aedb03e24f 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -18,7 +18,6 @@ def blip2_loader(**kwargs): from lavis.models import load_model_and_preprocess from lavis.models.blip2_models.blip2_image_text_matching import ( Blip2ITM, - Blip2Qformer, ) except ImportError: raise ImportError( @@ -34,7 +33,8 @@ def __init__( ): self.model_name = model_name self.device = device - self.model = Blip2ITM.from_pretrained("pretrain").to(self.device).float() + model_type = "coco" if "coco" in model_name else "pretrain" + self.model = Blip2ITM.from_pretrained(model_type).to(self.device).float() self.processor = Blip2Processor.from_pretrained(model_name) def preprocess( @@ -190,7 +190,7 @@ def get_fused_embeddings( return BLIP2ModelWrapper(**kwargs) -blip2_image_text_matching = ModelMeta( +blip2_opt_2_7b = ModelMeta( loader=partial( blip2_loader, model_name="Salesforce/blip2-opt-2.7b", @@ -202,13 +202,25 @@ def get_fused_embeddings( release_date="2024-03-22", ) +blip2_opt_6_7b_coco = ModelMeta( + loader=partial( + blip2_loader, + model_name="Salesforce/blip2-opt-6.7b-coco", + ), + name="Salesforce/blip2-opt-6.7b-coco", + languages=["eng_Latn"], + open_source=True, + revision="0d580de59320a25a4d2c386387bcef310d5f286e", + release_date="2024-03-31", +) + if __name__ == "__main__": import mteb mdl = mteb.get_model( - blip2_image_text_matching.name, blip2_image_text_matching.revision, device="cpu" + blip2_opt_2_7b.name, blip2_opt_2_7b.revision, device="cpu" ) emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) From e8f4ae1b6cdc455c7ac06d69bf6433936fac1ef4 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 16 Sep 2024 12:06:47 +0100 Subject: [PATCH 10/73] fix: BLIP2 better zero-shot classification without text_proj and vision_proj --- mteb/models/blip2_models.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index aedb03e24f..9cac90d6f0 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -35,6 +35,8 @@ def __init__( self.device = device model_type = "coco" if "coco" in model_name else "pretrain" self.model = Blip2ITM.from_pretrained(model_type).to(self.device).float() + # print numbr of parameters + print(f"Number of parameters: {sum(p.numel() for p in self.model.parameters())}") self.processor = Blip2Processor.from_pretrained(model_name) def preprocess( @@ -60,7 +62,7 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): return_tensors="pt", ).to(self.device) text_outputs = self.model.forward_text(text_tokens) - # text_outputs = normalize(self.model.text_proj(text_outputs)) + #text_outputs = normalize(self.model.text_proj(text_outputs)) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -81,7 +83,7 @@ def get_image_embeddings( inputs["pixel_values"].to(self.device) ) image_outputs = image_outputs[0][:, 0, :] - # image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) + #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -91,10 +93,8 @@ def get_image_embeddings( images=batch_images, return_tensors="pt", padding=True )["pixel_values"].to(self.device) image_outputs = self.model.forward_image(inputs) - image_outputs = image_outputs[0] - image_outputs = normalize( - self.model.vision_proj(image_outputs[:, 0, :]), dim=-1 - ) + image_outputs = image_outputs[0][:, 0, :] + #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) all_image_embeddings = torch.cat(all_image_embeddings, dim=0) @@ -105,6 +105,11 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): with torch.no_grad(): if isinstance(images, DataLoader): + # check dataloader batch size is the same as batch size + if images.batch_size != batch_size: + raise ValueError( + "Image DataLoader batch size must be the same as the given batch size: " + str(batch_size) + ) for batch_images, i in tqdm( zip(images, range(0, len(texts), batch_size)) ): @@ -117,6 +122,8 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): {"text_input": batch_texts, "image": image_inputs} ).multimodal_embeds[:, 0, :] + #multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) + all_multimodal_embeddings.append(multimodal_outputs.cpu()) else: for i in tqdm(range(0, len(texts), batch_size)): @@ -130,6 +137,8 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): {"text_input": batch_texts, "image": image_inputs} ).multimodal_embeds[:, 0, :] + #multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) + all_multimodal_embeddings.append(multimodal_outputs.cpu()) return torch.cat(all_multimodal_embeddings, dim=0) @@ -172,7 +181,7 @@ def get_fused_embeddings( ) if fusion_mode == "sum": fused_embeddings = text_embeddings + image_embeddings - if fusion_mode == "multimodal": + elif fusion_mode == "multimodal": fused_embeddings = self.get_multimodal_embeddings( texts, images, batch_size ) @@ -235,13 +244,21 @@ def get_fused_embeddings( cat_img = Image.open("cat.jpg") cat_text = "An image of a cat" - multi_cat_emb = mdl.get_multimodal_embeddings([cat_text], [cat_img]) + multi_cat_emb = mdl.get_fused_embeddings(["A photo of an animal"], [cat_img], fusion_mode="multimodal") + multi_conflicting_emb = mdl.get_fused_embeddings(["A photo of a dog"], [cat_img], fusion_mode="multimodal") + image_cat_emb = mdl.get_image_embeddings([cat_img]) text_cat_emb = mdl.get_text_embeddings(["An photo of a cat"]) text_dog_emb = mdl.get_text_embeddings(["An image of a dog"]) print(multi_cat_emb.shape) - sim1 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_cat_emb) - sim2 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_dog_emb) + sim1 = torch.nn.functional.cosine_similarity(image_cat_emb, text_cat_emb) + sim2 = torch.nn.functional.cosine_similarity(image_cat_emb, text_dog_emb) + sim3 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_cat_emb) + sim4 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_dog_emb) + sim5 = torch.nn.functional.cosine_similarity(multi_conflicting_emb, text_cat_emb) + print(sim1, sim2) + + print(sim3, sim4, sim5) From 57bc3b8d4e98e4d29c2e44fb12e1bdce1c263cc1 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 19 Sep 2024 16:06:02 +0100 Subject: [PATCH 11/73] tidy blip2 --- mteb/models/blip2_models.py | 2 +- mteb/tasks/Image/Clustering/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 9cac90d6f0..2195f42c12 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -158,7 +158,7 @@ def get_fused_embeddings( self, texts: list[str] = None, images: list[Image.Image] | DataLoader = None, - fusion_mode="multimodal", + fusion_mode="sum", batch_size: int = 32, ): # TODO: find out if BLIP has a prescribed way of fusing text and image embeddings diff --git a/mteb/tasks/Image/Clustering/__init__.py b/mteb/tasks/Image/Clustering/__init__.py index fd9a71ec19..9ce1b567e6 100644 --- a/mteb/tasks/Image/Clustering/__init__.py +++ b/mteb/tasks/Image/Clustering/__init__.py @@ -2,3 +2,4 @@ from .eng.CIFAR import * from .eng.TinyImageNet import * +from .eng.ImageNet import * From 4cbec1bc6957fcc74f35888191ee2cf620bdaa4c Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 19 Sep 2024 16:09:32 +0100 Subject: [PATCH 12/73] add imagenet-dog-15 dataset --- mteb/tasks/Image/Clustering/eng/ImageNet.py | 71 +++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 mteb/tasks/Image/Clustering/eng/ImageNet.py diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py new file mode 100644 index 0000000000..0efe69f844 --- /dev/null +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import io +import PIL.Image as Image +from mteb.abstasks.Image.AbsTaskImageClustering import AbsTaskImageClustering +from mteb.abstasks.TaskMetadata import TaskMetadata + +""" +Classes: +1.MALTESE DOG +2.BLENHEIM SPANIEL +3.BASSET +4.NORWEGIAN ELKHOUND +5.GIANT SCHNAUZER +6.GOLDEN RETRIEVER +7.BRITTANY SPANIEL +8.CLUMBER +9.WELSH SPRINGER SPANIEL +10.GROENENDAEL +11.KELPIE +12.SHETLAND SHEEPDOG +13.DOBERMAN +14.PUG +15.CHOW +""" + +class ImageNetDog15Clustering(AbsTaskImageClustering): + metadata = TaskMetadata( + name="ImageNetDog15Clustering", + description="Clustering images from a 15-class dogs-only subset of the dog classes in ImageNet.", + reference="http://vision.stanford.edu/aditya86/ImageNetDogs/main.html", + dataset={ + "path": "JamieSJS/imagenet-dog-15", + "revision": "bfb6ad3b2109d26c9daddf14f98d315daa35ee72", + }, + type="Clustering", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=( + "2009-06-20", + "2009-06-20" + ), # Conference date + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + socioeconomic_status="mixed", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation=""" @INPROCEEDINGS{5206848, + author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, + booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition}, + title={ImageNet: A large-scale hierarchical image database}, + year={2009}, + volume={}, + number={}, + pages={248-255}, + keywords={Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, + doi={10.1109/CVPR.2009.5206848}} + """, + descriptive_stats={ + "n_samples": {"test": 1076, "train":1500}, + #"avg_character_length": {"test": 431.4}, + }, + ) + + + From 35be38d82a6b2f43245e7094d0b62b037ceb13e6 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 19 Sep 2024 16:26:57 +0100 Subject: [PATCH 13/73] tidy and lint --- .../evaluators/Image/VisualSTSEvaluator.py | 6 ++-- mteb/models/blip2_models.py | 32 +++++++++--------- mteb/tasks/Image/Clustering/__init__.py | 2 +- mteb/tasks/Image/Clustering/eng/ImageNet.py | 33 ++----------------- mteb/tasks/Image/VisualSTS/__init__.py | 2 ++ .../Image/VisualSTS/en/STS12VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS13VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS14VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS15VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS16VisualSTS.py | 2 +- 10 files changed, 31 insertions(+), 54 deletions(-) diff --git a/mteb/evaluation/evaluators/Image/VisualSTSEvaluator.py b/mteb/evaluation/evaluators/Image/VisualSTSEvaluator.py index d47e060e75..a442eb6a9a 100644 --- a/mteb/evaluation/evaluators/Image/VisualSTSEvaluator.py +++ b/mteb/evaluation/evaluators/Image/VisualSTSEvaluator.py @@ -1,18 +1,18 @@ from __future__ import annotations import logging -from typing import Any +import math import os +from typing import Any import numpy as np +import torch from scipy.stats import pearsonr, spearmanr from sklearn.metrics.pairwise import ( paired_cosine_distances, paired_euclidean_distances, paired_manhattan_distances, ) -import math -import torch from torch.utils.data import DataLoader from torchvision import transforms diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 2195f42c12..aa92452ba9 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -5,7 +5,6 @@ import torch from PIL import Image -from torch.nn.functional import normalize from torch.utils.data import DataLoader from tqdm import tqdm from transformers import Blip2Processor @@ -36,7 +35,9 @@ def __init__( model_type = "coco" if "coco" in model_name else "pretrain" self.model = Blip2ITM.from_pretrained(model_type).to(self.device).float() # print numbr of parameters - print(f"Number of parameters: {sum(p.numel() for p in self.model.parameters())}") + print( + f"Number of parameters: {sum(p.numel() for p in self.model.parameters())}" + ) self.processor = Blip2Processor.from_pretrained(model_name) def preprocess( @@ -62,7 +63,7 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): return_tensors="pt", ).to(self.device) text_outputs = self.model.forward_text(text_tokens) - #text_outputs = normalize(self.model.text_proj(text_outputs)) + # text_outputs = normalize(self.model.text_proj(text_outputs)) all_text_embeddings.append(text_outputs.cpu()) all_text_embeddings = torch.cat(all_text_embeddings, dim=0) @@ -83,7 +84,7 @@ def get_image_embeddings( inputs["pixel_values"].to(self.device) ) image_outputs = image_outputs[0][:, 0, :] - #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) + # image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) else: with torch.no_grad(): @@ -94,7 +95,7 @@ def get_image_embeddings( )["pixel_values"].to(self.device) image_outputs = self.model.forward_image(inputs) image_outputs = image_outputs[0][:, 0, :] - #image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) + # image_outputs = normalize(self.model.vision_proj(image_outputs), dim=-1) all_image_embeddings.append(image_outputs.cpu()) all_image_embeddings = torch.cat(all_image_embeddings, dim=0) @@ -108,7 +109,8 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): # check dataloader batch size is the same as batch size if images.batch_size != batch_size: raise ValueError( - "Image DataLoader batch size must be the same as the given batch size: " + str(batch_size) + "Image DataLoader batch size must be the same as the given batch size: " + + str(batch_size) ) for batch_images, i in tqdm( zip(images, range(0, len(texts), batch_size)) @@ -122,7 +124,7 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): {"text_input": batch_texts, "image": image_inputs} ).multimodal_embeds[:, 0, :] - #multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) + # multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) all_multimodal_embeddings.append(multimodal_outputs.cpu()) else: @@ -137,7 +139,7 @@ def get_multimodal_embeddings(self, texts, images, batch_size=32): {"text_input": batch_texts, "image": image_inputs} ).multimodal_embeds[:, 0, :] - #multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) + # multimodal_outputs = normalize(self.model.text_proj(multimodal_outputs), dim=-1) all_multimodal_embeddings.append(multimodal_outputs.cpu()) @@ -225,12 +227,9 @@ def get_fused_embeddings( if __name__ == "__main__": - import mteb - mdl = mteb.get_model( - blip2_opt_2_7b.name, blip2_opt_2_7b.revision, device="cpu" - ) + mdl = mteb.get_model(blip2_opt_2_7b.name, blip2_opt_2_7b.revision, device="cpu") emb = mdl.get_text_embeddings(["Hello, world!"]) emb2 = mdl.get_text_embeddings(["Hello there, world!"]) emb3 = mdl.get_text_embeddings(["Goodbye, person!"]) @@ -244,8 +243,12 @@ def get_fused_embeddings( cat_img = Image.open("cat.jpg") cat_text = "An image of a cat" - multi_cat_emb = mdl.get_fused_embeddings(["A photo of an animal"], [cat_img], fusion_mode="multimodal") - multi_conflicting_emb = mdl.get_fused_embeddings(["A photo of a dog"], [cat_img], fusion_mode="multimodal") + multi_cat_emb = mdl.get_fused_embeddings( + ["A photo of an animal"], [cat_img], fusion_mode="multimodal" + ) + multi_conflicting_emb = mdl.get_fused_embeddings( + ["A photo of a dog"], [cat_img], fusion_mode="multimodal" + ) image_cat_emb = mdl.get_image_embeddings([cat_img]) text_cat_emb = mdl.get_text_embeddings(["An photo of a cat"]) text_dog_emb = mdl.get_text_embeddings(["An image of a dog"]) @@ -258,7 +261,6 @@ def get_fused_embeddings( sim4 = torch.nn.functional.cosine_similarity(multi_cat_emb, text_dog_emb) sim5 = torch.nn.functional.cosine_similarity(multi_conflicting_emb, text_cat_emb) - print(sim1, sim2) print(sim3, sim4, sim5) diff --git a/mteb/tasks/Image/Clustering/__init__.py b/mteb/tasks/Image/Clustering/__init__.py index 9ce1b567e6..804870ebeb 100644 --- a/mteb/tasks/Image/Clustering/__init__.py +++ b/mteb/tasks/Image/Clustering/__init__.py @@ -1,5 +1,5 @@ from __future__ import annotations from .eng.CIFAR import * -from .eng.TinyImageNet import * from .eng.ImageNet import * +from .eng.TinyImageNet import * diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index 0efe69f844..b45956cfe7 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -1,29 +1,8 @@ from __future__ import annotations -import io -import PIL.Image as Image from mteb.abstasks.Image.AbsTaskImageClustering import AbsTaskImageClustering from mteb.abstasks.TaskMetadata import TaskMetadata -""" -Classes: -1.MALTESE DOG -2.BLENHEIM SPANIEL -3.BASSET -4.NORWEGIAN ELKHOUND -5.GIANT SCHNAUZER -6.GOLDEN RETRIEVER -7.BRITTANY SPANIEL -8.CLUMBER -9.WELSH SPRINGER SPANIEL -10.GROENENDAEL -11.KELPIE -12.SHETLAND SHEEPDOG -13.DOBERMAN -14.PUG -15.CHOW -""" - class ImageNetDog15Clustering(AbsTaskImageClustering): metadata = TaskMetadata( name="ImageNetDog15Clustering", @@ -38,10 +17,7 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", - date=( - "2009-06-20", - "2009-06-20" - ), # Conference date + date=("2009-06-20", "2009-06-20"), # Conference date domains=["Web"], task_subtypes=["Object recognition"], license="Not specified", @@ -62,10 +38,7 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): doi={10.1109/CVPR.2009.5206848}} """, descriptive_stats={ - "n_samples": {"test": 1076, "train":1500}, - #"avg_character_length": {"test": 431.4}, + "n_samples": {"test": 1076, "train": 1500}, + # "avg_character_length": {"test": 431.4}, }, ) - - - diff --git a/mteb/tasks/Image/VisualSTS/__init__.py b/mteb/tasks/Image/VisualSTS/__init__.py index cc7823118b..eb785d5d85 100644 --- a/mteb/tasks/Image/VisualSTS/__init__.py +++ b/mteb/tasks/Image/VisualSTS/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .en.STS12VisualSTS import * from .en.STS13VisualSTS import * from .en.STS14VisualSTS import * diff --git a/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py index 1f88b8045a..8d78bb7238 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.abstasks.Image.AbsTaskVisualSTS import AbsTaskVisualSTS +from mteb.abstasks.TaskMetadata import TaskMetadata class STS12VisualSTS(AbsTaskVisualSTS): diff --git a/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py index 122a5d6d30..1b02248d35 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.abstasks.Image.AbsTaskVisualSTS import AbsTaskVisualSTS +from mteb.abstasks.TaskMetadata import TaskMetadata class STS13VisualSTS(AbsTaskVisualSTS): diff --git a/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py index cbbcc94445..a427fdae0b 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.abstasks.Image.AbsTaskVisualSTS import AbsTaskVisualSTS +from mteb.abstasks.TaskMetadata import TaskMetadata class STS14VisualSTS(AbsTaskVisualSTS): diff --git a/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py index 9eb99af506..12c9a74c81 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.abstasks.Image.AbsTaskVisualSTS import AbsTaskVisualSTS +from mteb.abstasks.TaskMetadata import TaskMetadata class STS15VisualSTS(AbsTaskVisualSTS): diff --git a/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py index 7db7b4f906..ae1e2900dd 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.abstasks.Image.AbsTaskVisualSTS import AbsTaskVisualSTS +from mteb.abstasks.TaskMetadata import TaskMetadata class STS16VisualSTS(AbsTaskVisualSTS): From 83d0f455d75dc904db7c83740f09277e62ad28e3 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 19 Sep 2024 16:37:43 +0100 Subject: [PATCH 14/73] remove unused import --- mteb/models/blip2_models.py | 1 - mteb/tasks/Image/Clustering/eng/ImageNet.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index aa92452ba9..cb289b3f96 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -14,7 +14,6 @@ def blip2_loader(**kwargs): try: # a temporal fix for the dependency issues of vista models. - from lavis.models import load_model_and_preprocess from lavis.models.blip2_models.blip2_image_text_matching import ( Blip2ITM, ) diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index b45956cfe7..1259808450 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -3,6 +3,7 @@ from mteb.abstasks.Image.AbsTaskImageClustering import AbsTaskImageClustering from mteb.abstasks.TaskMetadata import TaskMetadata + class ImageNetDog15Clustering(AbsTaskImageClustering): metadata = TaskMetadata( name="ImageNetDog15Clustering", From a309de512954b958ac005869c33534ec39985497 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 19 Sep 2024 21:17:45 +0100 Subject: [PATCH 15/73] add cluster_accuracy, ari and nmi to Image.ClusteringEvaluator --- .../evaluators/Image/ClusteringEvaluator.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py b/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py index b006470416..31b5c26f1a 100644 --- a/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py +++ b/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py @@ -5,8 +5,10 @@ import sklearn import sklearn.cluster +import numpy as np from PIL import Image from sklearn import metrics +from scipy.optimize import linear_sum_assignment from mteb.encoder_interface import Encoder from mteb.evaluation.evaluators.Evaluator import Evaluator @@ -53,6 +55,16 @@ def __call__(self, model: Encoder, *, encode_kwargs: dict[str, Any] = {}): logger.info("Evaluating...") v_measure = metrics.cluster.v_measure_score(self.labels, cluster_assignment) + nmi = metrics.cluster.normalized_mutual_info_score(self.labels, cluster_assignment) + ari = metrics.cluster.adjusted_rand_score(self.labels, cluster_assignment) + accuracy = metrics.accuracy_score(self.labels, cluster_assignment) + + matrix = metrics.confusion_matrix(self.labels, cluster_assignment) + + # get linear sum assignment + row_ind, col_ind = linear_sum_assignment(matrix, maximize=True) + total_correct = matrix[row_ind, col_ind].sum() + clustering_accuracy = total_correct / len(self.labels) - return {"v_measure": v_measure, "accuracy": accuracy} + return {"v_measure": v_measure, "accuracy": accuracy, "nmi": nmi, "ari": ari, "cluster_accuracy": clustering_accuracy} From 02c1f81b85e1dda106dace662af3905096ca1c31 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 20 Sep 2024 12:33:26 +0100 Subject: [PATCH 16/73] add imagenet-10 clustering task --- mteb/tasks/Image/Clustering/eng/ImageNet.py | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index 1259808450..dd02d8e830 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -43,3 +43,43 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): # "avg_character_length": {"test": 431.4}, }, ) + +class ImageNet10Clustering(AbsTaskImageClustering): + metadata = TaskMetadata( + name="ImageNet10Clustering", + description="Clustering images from an 10-class subset of ImageNet which are generally easy to distinguish.", + reference="https://www.kaggle.com/datasets/liusha249/imagenet10", + dataset={ + "path": "JamieSJS/imagenet-10", + "revision": "88f8a6d47c257895094c5ad81e67ba751771fc99", + }, + type="Clustering", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2009-06-20", "2009-06-20"), # Conference date + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + socioeconomic_status="mixed", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation=""" @INPROCEEDINGS{5206848, + author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, + booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition}, + title={ImageNet: A large-scale hierarchical image database}, + year={2009}, + volume={}, + number={}, + pages={248-255}, + keywords={Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, + doi={10.1109/CVPR.2009.5206848}} + """, + descriptive_stats={ + "n_samples": {"test": 13000}, + # "avg_character_length": {"test": 431.4}, + }, + ) From cede108a8bbce91bd345e29b3fb6bd31ffa06b51 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 20 Sep 2024 14:02:40 +0100 Subject: [PATCH 17/73] add SOPI2IRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 2cda0b0664..7ce85ca362 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -19,6 +19,7 @@ from .eng.OVENIT2TRetrieval import * from .eng.SciMMIRI2TRetrieval import * from .eng.SciMMIRT2IRetrieval import * +from .eng.SOPI2IRetrieval import * from .eng.StanfordCarsI2IRetrieval import * from .eng.TUBerlinT2IRetrieval import * from .eng.VisualNewsI2TRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py new file mode 100644 index 0000000000..299a8547db --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="SOPI2IRetrieval", + description="Retrieve product photos of 22634 online products.", + reference="https://www.florian-schroff.de/publications/CUB-200.pdf", + dataset={ + "path": "JamieSJS/stanford-online-products", + "revision": "34e35aeeb47a240e9a60609efa21dd00d6e2478f", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Encyclopaedic"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{oh2016deep, + title={Deep metric learning via lifted structured feature embedding}, + author={Oh Song, Hyun and Xiang, Yu and Jegelka, Stefanie and Savarese, Silvio}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4004--4012}, + year={2016} +} + """, + descriptive_stats={ + "n_samples": {"default": 120053}, + }, + ) + skip_first_result = True From d226748b698d8ab7513fabf917caa8ade0e3f752 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 20 Sep 2024 14:05:01 +0100 Subject: [PATCH 18/73] add results forclip on ImageNet10Clustering and ImageNetDog15Clustering --- .../evaluators/Image/ClusteringEvaluator.py | 19 ++++++++++----- mteb/tasks/Image/Clustering/eng/ImageNet.py | 1 + .../ImageNet10Clustering.json | 23 +++++++++++++++++++ .../ImageNetDog15Clustering.json | 23 +++++++++++++++++++ 4 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNet10Clustering.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNetDog15Clustering.json diff --git a/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py b/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py index 31b5c26f1a..f53befe8ef 100644 --- a/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py +++ b/mteb/evaluation/evaluators/Image/ClusteringEvaluator.py @@ -5,10 +5,9 @@ import sklearn import sklearn.cluster -import numpy as np from PIL import Image -from sklearn import metrics from scipy.optimize import linear_sum_assignment +from sklearn import metrics from mteb.encoder_interface import Encoder from mteb.evaluation.evaluators.Evaluator import Evaluator @@ -55,16 +54,24 @@ def __call__(self, model: Encoder, *, encode_kwargs: dict[str, Any] = {}): logger.info("Evaluating...") v_measure = metrics.cluster.v_measure_score(self.labels, cluster_assignment) - nmi = metrics.cluster.normalized_mutual_info_score(self.labels, cluster_assignment) + nmi = metrics.cluster.normalized_mutual_info_score( + self.labels, cluster_assignment + ) ari = metrics.cluster.adjusted_rand_score(self.labels, cluster_assignment) accuracy = metrics.accuracy_score(self.labels, cluster_assignment) - + matrix = metrics.confusion_matrix(self.labels, cluster_assignment) - + # get linear sum assignment row_ind, col_ind = linear_sum_assignment(matrix, maximize=True) total_correct = matrix[row_ind, col_ind].sum() clustering_accuracy = total_correct / len(self.labels) - return {"v_measure": v_measure, "accuracy": accuracy, "nmi": nmi, "ari": ari, "cluster_accuracy": clustering_accuracy} + return { + "v_measure": v_measure, + "accuracy": accuracy, + "nmi": nmi, + "ari": ari, + "cluster_accuracy": clustering_accuracy, + } diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index dd02d8e830..dcf8587322 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -44,6 +44,7 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): }, ) + class ImageNet10Clustering(AbsTaskImageClustering): metadata = TaskMetadata( name="ImageNet10Clustering", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNet10Clustering.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNet10Clustering.json new file mode 100644 index 0000000000..d502635992 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNet10Clustering.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "88f8a6d47c257895094c5ad81e67ba751771fc99", + "evaluation_time": 33.32936453819275, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.1993076923076923, + "ari": 0.9672782515730578, + "cluster_accuracy": 0.985, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.1993076923076923, + "nmi": 0.9644473066207006, + "v_measure": 0.9644473066207006 + } + ] + }, + "task_name": "ImageNet10Clustering" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNetDog15Clustering.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNetDog15Clustering.json new file mode 100644 index 0000000000..fe53c8ed7e --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageNetDog15Clustering.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "bfb6ad3b2109d26c9daddf14f98d315daa35ee72", + "evaluation_time": 4.18316650390625, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.026022304832713755, + "ari": 0.36465670607270784, + "cluster_accuracy": 0.4656133828996282, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.026022304832713755, + "nmi": 0.5160500208664386, + "v_measure": 0.5160500208664386 + } + ] + }, + "task_name": "ImageNetDog15Clustering" +} \ No newline at end of file From c7c68939eebda07ed279c59df808366f8a8b2ac3 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 20 Sep 2024 14:26:08 +0100 Subject: [PATCH 19/73] add SOPI2IRetrieval results for clip 32 --- .../SOPI2IRetrieval.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json new file mode 100644 index 0000000000..0ffb779a00 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "34e35aeeb47a240e9a60609efa21dd00d6e2478f", + "evaluation_time": 523.904242515564, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.50296, + "cv_recall_at_10": 0.67867, + "cv_recall_at_100": 0.80207, + "cv_recall_at_1000": 0.90797, + "cv_recall_at_20": 0.71918, + "cv_recall_at_3": 0.59883, + "cv_recall_at_5": 0.63223, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.50296, + "map_at_1": 0.01743, + "map_at_10": 0.18702, + "map_at_100": 0.19992, + "map_at_1000": 0.20163, + "map_at_20": 0.19415, + "map_at_3": 0.13926, + "map_at_5": 0.16856, + "mrr_at_1": 0.020809401784925335, + "mrr_at_10": 0.30757755224265354, + "mrr_at_100": 0.3127412453176731, + "mrr_at_1000": 0.3131742235270453, + "mrr_at_20": 0.3106655733183114, + "mrr_at_3": 0.28876174486759937, + "mrr_at_5": 0.30027318782951246, + "nauc_cv_recall_at_1000_diff1": -0.16859046840753472, + "nauc_cv_recall_at_1000_max": 0.025825747180792717, + "nauc_cv_recall_at_1000_std": 0.10093428974009021, + "nauc_cv_recall_at_100_diff1": -0.21029734576768694, + "nauc_cv_recall_at_100_max": 0.014647522560512015, + "nauc_cv_recall_at_100_std": 0.0507444053462594, + "nauc_cv_recall_at_10_diff1": -0.27159676443421377, + "nauc_cv_recall_at_10_max": 0.005491087955239775, + "nauc_cv_recall_at_10_std": 0.02474251556064112, + "nauc_cv_recall_at_1_diff1": -0.2932928014798219, + "nauc_cv_recall_at_1_max": -0.002633536711876493, + "nauc_cv_recall_at_1_std": -0.008456158086326292, + "nauc_cv_recall_at_20_diff1": -0.25166875961614193, + "nauc_cv_recall_at_20_max": 0.006747419978420269, + "nauc_cv_recall_at_20_std": 0.035987294094302555, + "nauc_cv_recall_at_3_diff1": -0.29779984100144863, + "nauc_cv_recall_at_3_max": -0.0035652648610872604, + "nauc_cv_recall_at_3_std": 0.014056845458600668, + "nauc_cv_recall_at_5_diff1": -0.2888857057827477, + "nauc_cv_recall_at_5_max": 7.1464099594732e-05, + "nauc_cv_recall_at_5_std": 0.01823034964853647, + "nauc_map_at_1000_diff1": -0.2714835648506667, + "nauc_map_at_1000_max": 0.0048783333927581995, + "nauc_map_at_1000_std": 0.10933153351257194, + "nauc_map_at_100_diff1": -0.27106477649697597, + "nauc_map_at_100_max": 0.004961279421023488, + "nauc_map_at_100_std": 0.10869233131948694, + "nauc_map_at_10_diff1": -0.27101224264468865, + "nauc_map_at_10_max": 0.003129881807984945, + "nauc_map_at_10_std": 0.09970024568152357, + "nauc_map_at_1_diff1": -0.5746988023835725, + "nauc_map_at_1_max": 0.009513570955681795, + "nauc_map_at_1_std": 0.24050607736711668, + "nauc_map_at_20_diff1": -0.27103409286809227, + "nauc_map_at_20_max": 0.004505986343978161, + "nauc_map_at_20_std": 0.10422239017835391, + "nauc_map_at_3_diff1": -0.25758853794935105, + "nauc_map_at_3_max": 0.0003165186809392022, + "nauc_map_at_3_std": 0.073787815755662, + "nauc_map_at_5_diff1": -0.26840871915761827, + "nauc_map_at_5_max": 0.002542520569357188, + "nauc_map_at_5_std": 0.0864360224226272, + "nauc_mrr_at_1000_diff1": -0.352973203440373, + "nauc_mrr_at_1000_max": 0.004279169460655329, + "nauc_mrr_at_1000_std": 0.05010168406176362, + "nauc_mrr_at_100_diff1": -0.35282838896266405, + "nauc_mrr_at_100_max": 0.004246244348126371, + "nauc_mrr_at_100_std": 0.0500103093583224, + "nauc_mrr_at_10_diff1": -0.3544263028903223, + "nauc_mrr_at_10_max": 0.003929367832392052, + "nauc_mrr_at_10_std": 0.0478252958551306, + "nauc_mrr_at_1_diff1": -0.5746988023835726, + "nauc_mrr_at_1_max": 0.029047376939523493, + "nauc_mrr_at_1_std": 0.2870259521668249, + "nauc_mrr_at_20_diff1": -0.35339280008168317, + "nauc_mrr_at_20_max": 0.004175461034327089, + "nauc_mrr_at_20_std": 0.049148248185835845, + "nauc_mrr_at_3_diff1": -0.3591723252785509, + "nauc_mrr_at_3_max": 0.00403104720219049, + "nauc_mrr_at_3_std": 0.0404960811268073, + "nauc_mrr_at_5_diff1": -0.3585645562893135, + "nauc_mrr_at_5_max": 0.0020548539639942804, + "nauc_mrr_at_5_std": 0.045522727902755446, + "nauc_ndcg_at_1000_diff1": -0.2685009406444735, + "nauc_ndcg_at_1000_max": 0.005836111353149481, + "nauc_ndcg_at_1000_std": 0.11311294743168826, + "nauc_ndcg_at_100_diff1": -0.2626615425913708, + "nauc_ndcg_at_100_max": 0.006500040666350577, + "nauc_ndcg_at_100_std": 0.10412717822836703, + "nauc_ndcg_at_10_diff1": -0.26986440689503255, + "nauc_ndcg_at_10_max": 0.002259064902287912, + "nauc_ndcg_at_10_std": 0.08351960465910635, + "nauc_ndcg_at_1_diff1": -0.5746988023835727, + "nauc_ndcg_at_1_max": 0.01860068455415359, + "nauc_ndcg_at_1_std": 0.24591999227187353, + "nauc_ndcg_at_20_diff1": -0.26557544406085987, + "nauc_ndcg_at_20_max": 0.0047811746525403785, + "nauc_ndcg_at_20_std": 0.09053231502866856, + "nauc_ndcg_at_3_diff1": -0.3079122639483788, + "nauc_ndcg_at_3_max": 0.001817933912863451, + "nauc_ndcg_at_3_std": 0.05331981166099078, + "nauc_ndcg_at_5_diff1": -0.29161633386458363, + "nauc_ndcg_at_5_max": 0.00323633286273378, + "nauc_ndcg_at_5_std": 0.07076540546903416, + "nauc_precision_at_1000_diff1": -0.13140300790068282, + "nauc_precision_at_1000_max": 0.005922440105917975, + "nauc_precision_at_1000_std": 0.08342438590882045, + "nauc_precision_at_100_diff1": -0.17052567611996527, + "nauc_precision_at_100_max": 0.011026201381103823, + "nauc_precision_at_100_std": 0.09223669236894032, + "nauc_precision_at_10_diff1": -0.23209493838034498, + "nauc_precision_at_10_max": 0.004908243372756972, + "nauc_precision_at_10_std": 0.07007542762563608, + "nauc_precision_at_1_diff1": -0.5746988023835727, + "nauc_precision_at_1_max": 0.01860068455415359, + "nauc_precision_at_1_std": 0.24591999227187353, + "nauc_precision_at_20_diff1": -0.2109568972104465, + "nauc_precision_at_20_max": 0.007714408837495573, + "nauc_precision_at_20_std": 0.07965290786741164, + "nauc_precision_at_3_diff1": -0.2870525654193057, + "nauc_precision_at_3_max": -0.0007584139080774421, + "nauc_precision_at_3_std": 0.0318750842480405, + "nauc_precision_at_5_diff1": -0.25948022131281406, + "nauc_precision_at_5_max": 0.004692362608818381, + "nauc_precision_at_5_std": 0.04879698010109281, + "nauc_recall_at_1000_diff1": -0.12358858307301535, + "nauc_recall_at_1000_max": 0.009306080801565777, + "nauc_recall_at_1000_std": 0.15591617596817545, + "nauc_recall_at_100_diff1": -0.14715964976867443, + "nauc_recall_at_100_max": 0.01041920443322213, + "nauc_recall_at_100_std": 0.11606682272338587, + "nauc_recall_at_10_diff1": -0.18845161575089034, + "nauc_recall_at_10_max": 0.000650022994426268, + "nauc_recall_at_10_std": 0.07614315809536469, + "nauc_recall_at_1_diff1": -0.5746988023835725, + "nauc_recall_at_1_max": 0.009513570955681795, + "nauc_recall_at_1_std": 0.24050607736711668, + "nauc_recall_at_20_diff1": -0.17338195050795077, + "nauc_recall_at_20_max": 0.006581585832690798, + "nauc_recall_at_20_std": 0.08663740977783436, + "nauc_recall_at_3_diff1": -0.19307909748063756, + "nauc_recall_at_3_max": 0.0003553588908448416, + "nauc_recall_at_3_std": 0.0442289537166574, + "nauc_recall_at_5_diff1": -0.19899176174509764, + "nauc_recall_at_5_max": -0.0005095021106718202, + "nauc_recall_at_5_std": 0.060914430592624226, + "ndcg_at_1": 0.03583, + "ndcg_at_10": 0.27789, + "ndcg_at_100": 0.32166, + "ndcg_at_1000": 0.35384, + "ndcg_at_20": 0.29399, + "ndcg_at_3": 0.26171, + "ndcg_at_5": 0.27031, + "precision_at_1": 0.03583, + "precision_at_10": 0.13151, + "precision_at_100": 0.01932, + "precision_at_1000": 0.0027, + "precision_at_20": 0.07531, + "precision_at_3": 0.26493, + "precision_at_5": 0.21197, + "recall_at_1": 0.01743, + "recall_at_10": 0.3639, + "recall_at_100": 0.50674, + "recall_at_1000": 0.67803, + "recall_at_20": 0.40787, + "recall_at_3": 0.24952, + "recall_at_5": 0.30796 + } + ] + }, + "task_name": "SOPI2IRetrieval" +} \ No newline at end of file From 5bf66fdbd99a08f372db1f2250709171a184bafa Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 23 Sep 2024 11:53:48 +0100 Subject: [PATCH 20/73] add results for clip vit 32/SOPI2IRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 2 +- .../SOPI2IRetrieval.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index 299a8547db..136c2f214c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -11,7 +11,7 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.florian-schroff.de/publications/CUB-200.pdf", dataset={ "path": "JamieSJS/stanford-online-products", - "revision": "34e35aeeb47a240e9a60609efa21dd00d6e2478f", + "revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", }, type="Retrieval", category="i2i", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json index 0ffb779a00..6c038f69cf 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json @@ -1,6 +1,6 @@ { - "dataset_revision": "34e35aeeb47a240e9a60609efa21dd00d6e2478f", - "evaluation_time": 523.904242515564, + "dataset_revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", + "evaluation_time": 510.43445205688477, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { From b1759dcd1d4bc2e5e2e01e4b54300f928454a76e Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 23 Sep 2024 12:18:27 +0100 Subject: [PATCH 21/73] resolve conflict --- mteb/models/blip2_models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 32bd366164..9f950ad525 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -13,11 +13,7 @@ def blip2_loader(**kwargs): -<<<<<<< HEAD - try: # a temporal fix for the dependency issues of vista models. -======= try: # a temporal fix for the dependency issues. ->>>>>>> 99e631f0f5b4b648d4fc4431cd3fed685452c9d3 from lavis.models.blip2_models.blip2_image_text_matching import ( Blip2ITM, ) From a8901718ac8442888de753e44d0d3ba553bb52ce Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 23 Sep 2024 15:39:42 +0100 Subject: [PATCH 22/73] add RP2kI2IRetrieval dataset --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../Any2AnyRetrieval/eng/RP2kI2IRetrieval.py | 40 +++++++++++++++++++ .../Image/ImageClassification/__init__.py | 1 + 3 files changed, 42 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 7ce85ca362..2fb3db358a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -17,6 +17,7 @@ from .eng.NIGHTSI2IRetrieval import * from .eng.OVENIT2ITRetrieval import * from .eng.OVENIT2TRetrieval import * +from .eng.RP2kI2IRetrieval import * from .eng.SciMMIRI2TRetrieval import * from .eng.SciMMIRT2IRetrieval import * from .eng.SOPI2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py new file mode 100644 index 0000000000..1335e11659 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class RP2kI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="RP2kI2IRetrieval", + description="Retrieve photos of 39457 products.", + reference="https://arxiv.org/abs/2006.12634", + dataset={ + "path": "JamieSJS/rp2k", + "revision": "f8f82d4eb1aa4dc4dbf2c768596c8110a3703765", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@article{peng2020rp2k, + title={RP2K: A large-scale retail product dataset for fine-grained image classification}, + author={Peng, Jingtian and Xiao, Chang and Li, Yifan}, + journal={arXiv preprint arXiv:2006.12634}, + year={2020} +} + """, + descriptive_stats={ + "n_samples": {"default": 4409419}, + }, + ) + skip_first_result = True diff --git a/mteb/tasks/Image/ImageClassification/__init__.py b/mteb/tasks/Image/ImageClassification/__init__.py index c5a82f357d..e658c625e5 100644 --- a/mteb/tasks/Image/ImageClassification/__init__.py +++ b/mteb/tasks/Image/ImageClassification/__init__.py @@ -11,6 +11,7 @@ from .eng.Food101Classification import * from .eng.GTSRBClassification import * from .eng.Imagenet1k import * +from .eng.Kinetics700Classification import * from .eng.MNISTClassification import * from .eng.OxfordFlowersClassification import * from .eng.OxfordPetsClassification import * From d3396fae2b8ab3c5cdef24473d67f346ea014587 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 23 Sep 2024 15:40:20 +0100 Subject: [PATCH 23/73] add RP2kI2IRetrieval results with clip-vit-base-patch32 --- .../RP2kI2IRetrieval.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RP2kI2IRetrieval.json diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RP2kI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RP2kI2IRetrieval.json new file mode 100644 index 0000000000..eafeff620f --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RP2kI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "f8f82d4eb1aa4dc4dbf2c768596c8110a3703765", + "evaluation_time": 324.14615392684937, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.55887, + "cv_recall_at_10": 0.77913, + "cv_recall_at_100": 0.90418, + "cv_recall_at_1000": 0.95933, + "cv_recall_at_20": 0.82554, + "cv_recall_at_3": 0.6808, + "cv_recall_at_5": 0.72537, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.55887, + "map_at_1": 0.06083, + "map_at_10": 0.12431, + "map_at_100": 0.17952, + "map_at_1000": 0.2009, + "map_at_20": 0.14322, + "map_at_3": 0.09136, + "map_at_5": 0.10532, + "mrr_at_1": 0.9998166867438328, + "mrr_at_10": 0.9998952495679044, + "mrr_at_100": 0.9998952495679044, + "mrr_at_1000": 0.9998952495679044, + "mrr_at_20": 0.9998952495679044, + "mrr_at_3": 0.9998952495679044, + "mrr_at_5": 0.9998952495679044, + "nauc_cv_recall_at_1000_diff1": -0.6534775051605857, + "nauc_cv_recall_at_1000_max": 0.016875483355911193, + "nauc_cv_recall_at_1000_std": 0.1834914123790724, + "nauc_cv_recall_at_100_diff1": -0.46190450882692813, + "nauc_cv_recall_at_100_max": 0.0044555049072486745, + "nauc_cv_recall_at_100_std": 0.3217134902493942, + "nauc_cv_recall_at_10_diff1": -0.3197493491364096, + "nauc_cv_recall_at_10_max": 0.004081705009985084, + "nauc_cv_recall_at_10_std": 0.3833079134677583, + "nauc_cv_recall_at_1_diff1": -0.2732166234186543, + "nauc_cv_recall_at_1_max": -0.010034973300874108, + "nauc_cv_recall_at_1_std": 0.3494947305628685, + "nauc_cv_recall_at_20_diff1": -0.3517887547295718, + "nauc_cv_recall_at_20_max": 0.006552844210252164, + "nauc_cv_recall_at_20_std": 0.3793238219190742, + "nauc_cv_recall_at_3_diff1": -0.28039644458970836, + "nauc_cv_recall_at_3_max": 0.00026932680904395343, + "nauc_cv_recall_at_3_std": 0.3777753345990772, + "nauc_cv_recall_at_5_diff1": -0.29514123944001847, + "nauc_cv_recall_at_5_max": 0.0002103622905348435, + "nauc_cv_recall_at_5_std": 0.3862948975183731, + "nauc_map_at_1000_diff1": 0.08734717040767775, + "nauc_map_at_1000_max": -0.010675299588909455, + "nauc_map_at_1000_std": 0.41490480554200815, + "nauc_map_at_100_diff1": 0.135072420725819, + "nauc_map_at_100_max": -0.00837091280878698, + "nauc_map_at_100_std": 0.35005558541793835, + "nauc_map_at_10_diff1": 0.26950863371478956, + "nauc_map_at_10_max": -0.004745789557209113, + "nauc_map_at_10_std": 0.14275927317472537, + "nauc_map_at_1_diff1": 0.40347502895694837, + "nauc_map_at_1_max": -0.006070639716117982, + "nauc_map_at_1_std": 0.01609734274601474, + "nauc_map_at_20_diff1": 0.22773030861533414, + "nauc_map_at_20_max": -0.005347115187225529, + "nauc_map_at_20_std": 0.2096685956925957, + "nauc_map_at_3_diff1": 0.3350402994842465, + "nauc_map_at_3_max": -0.002361981770631997, + "nauc_map_at_3_std": 0.05928879397905913, + "nauc_map_at_5_diff1": 0.30786980093898875, + "nauc_map_at_5_max": -0.0033167682834259605, + "nauc_map_at_5_std": 0.08902137115070434, + "nauc_mrr_at_1000_diff1": 0.7693741473921198, + "nauc_mrr_at_1000_max": -0.07329673170966303, + "nauc_mrr_at_1000_std": -0.41562237775388855, + "nauc_mrr_at_100_diff1": 0.7693741473921198, + "nauc_mrr_at_100_max": -0.07329673170966303, + "nauc_mrr_at_100_std": -0.41562237775388855, + "nauc_mrr_at_10_diff1": 0.7693741473921198, + "nauc_mrr_at_10_max": -0.07329673170966303, + "nauc_mrr_at_10_std": -0.41562237775388855, + "nauc_mrr_at_1_diff1": 0.7976521093473791, + "nauc_mrr_at_1_max": -0.05649010070984422, + "nauc_mrr_at_1_std": -0.43184586481189485, + "nauc_mrr_at_20_diff1": 0.7693741473921198, + "nauc_mrr_at_20_max": -0.07329673170966303, + "nauc_mrr_at_20_std": -0.41562237775388855, + "nauc_mrr_at_3_diff1": 0.7693741473921198, + "nauc_mrr_at_3_max": -0.07329673170966303, + "nauc_mrr_at_3_std": -0.41562237775388855, + "nauc_mrr_at_5_diff1": 0.7693741473921198, + "nauc_mrr_at_5_max": -0.07329673170966303, + "nauc_mrr_at_5_std": -0.41562237775388855, + "nauc_ndcg_at_1000_diff1": 0.009920350828696455, + "nauc_ndcg_at_1000_max": -0.010774540419665588, + "nauc_ndcg_at_1000_std": 0.43694481975295574, + "nauc_ndcg_at_100_diff1": 0.018084517333648395, + "nauc_ndcg_at_100_max": -0.0147514814443795, + "nauc_ndcg_at_100_std": 0.46744646393565903, + "nauc_ndcg_at_10_diff1": -0.16407446859272273, + "nauc_ndcg_at_10_max": -0.01784761939283373, + "nauc_ndcg_at_10_std": 0.45860760438003595, + "nauc_ndcg_at_1_diff1": 0.7976521093473791, + "nauc_ndcg_at_1_max": -0.11251153162885205, + "nauc_ndcg_at_1_std": -0.43184586481189485, + "nauc_ndcg_at_20_diff1": -0.1180606939919965, + "nauc_ndcg_at_20_max": -0.017164859556287968, + "nauc_ndcg_at_20_std": 0.48562601519497933, + "nauc_ndcg_at_3_diff1": -0.21591401825959575, + "nauc_ndcg_at_3_max": -0.013782741144658254, + "nauc_ndcg_at_3_std": 0.38832357405024914, + "nauc_ndcg_at_5_diff1": -0.19548651074158818, + "nauc_ndcg_at_5_max": -0.017383534701624292, + "nauc_ndcg_at_5_std": 0.42227867362587224, + "nauc_precision_at_1000_diff1": -0.26457930829821386, + "nauc_precision_at_1000_max": -0.019625850952636456, + "nauc_precision_at_1000_std": 0.44487310862894697, + "nauc_precision_at_100_diff1": -0.28070930263126453, + "nauc_precision_at_100_max": -0.0183983445570901, + "nauc_precision_at_100_std": 0.5216852457299462, + "nauc_precision_at_10_diff1": -0.2642289169634242, + "nauc_precision_at_10_max": -0.016037018440131376, + "nauc_precision_at_10_std": 0.43775176956816353, + "nauc_precision_at_1_diff1": 0.7976521093473791, + "nauc_precision_at_1_max": -0.11251153162885205, + "nauc_precision_at_1_std": -0.43184586481189485, + "nauc_precision_at_20_diff1": -0.26818857979333577, + "nauc_precision_at_20_max": -0.015097011478949736, + "nauc_precision_at_20_std": 0.47166450463929854, + "nauc_precision_at_3_diff1": -0.2643174309717343, + "nauc_precision_at_3_max": -0.01229104802996264, + "nauc_precision_at_3_std": 0.37315288392656526, + "nauc_precision_at_5_diff1": -0.2618738011679915, + "nauc_precision_at_5_max": -0.016386565682558307, + "nauc_precision_at_5_std": 0.4022744328150495, + "nauc_recall_at_1000_diff1": 0.02988142348682087, + "nauc_recall_at_1000_max": -0.006736994916843787, + "nauc_recall_at_1000_std": 0.38393510595562075, + "nauc_recall_at_100_diff1": 0.12754685099331003, + "nauc_recall_at_100_max": -0.005988478824620124, + "nauc_recall_at_100_std": 0.3383849668377813, + "nauc_recall_at_10_diff1": 0.27252371808491316, + "nauc_recall_at_10_max": -0.00514889254539631, + "nauc_recall_at_10_std": 0.1385644816305786, + "nauc_recall_at_1_diff1": 0.40347502895694837, + "nauc_recall_at_1_max": -0.006070639716117982, + "nauc_recall_at_1_std": 0.01609734274601474, + "nauc_recall_at_20_diff1": 0.23198661607389173, + "nauc_recall_at_20_max": -0.0036289993571175373, + "nauc_recall_at_20_std": 0.20018947547570426, + "nauc_recall_at_3_diff1": 0.3355751433312504, + "nauc_recall_at_3_max": -0.001792243329487425, + "nauc_recall_at_3_std": 0.0593931739832833, + "nauc_recall_at_5_diff1": 0.30950726685875857, + "nauc_recall_at_5_max": -0.003205556081532953, + "nauc_recall_at_5_std": 0.08819687899167265, + "ndcg_at_1": 0.99982, + "ndcg_at_10": 0.5575, + "ndcg_at_100": 0.36797, + "ndcg_at_1000": 0.41673, + "ndcg_at_20": 0.475, + "ndcg_at_3": 0.7517, + "ndcg_at_5": 0.66112, + "precision_at_1": 0.99982, + "precision_at_10": 0.42989, + "precision_at_100": 0.15119, + "precision_at_1000": 0.03396, + "precision_at_20": 0.32737, + "precision_at_3": 0.67203, + "precision_at_5": 0.557, + "recall_at_1": 0.06083, + "recall_at_10": 0.13643, + "recall_at_100": 0.26298, + "recall_at_1000": 0.44653, + "recall_at_20": 0.16902, + "recall_at_3": 0.09249, + "recall_at_5": 0.10951 + } + ] + }, + "task_name": "RP2kI2IRetrieval" +} \ No newline at end of file From aa5855661c5bd8154cc67cda9de15fef37d057a0 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 23 Sep 2024 16:27:42 +0100 Subject: [PATCH 24/73] update image retrieval __init__.py --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 2 +- .../SOPI2IRetrieval.json | 186 ------------------ 3 files changed, 2 insertions(+), 187 deletions(-) delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 2fb3db358a..69f31aeb1b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -12,6 +12,7 @@ from .eng.InfoSeekIT2TRetrieval import * from .eng.MemotionI2TRetrieval import * from .eng.MemotionT2IRetrieval import * +from .eng.METI2IRetrieval import * from .eng.MSCOCOI2TRetrieval import * from .eng.MSCOCOT2IRetrieval import * from .eng.NIGHTSI2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index 136c2f214c..ab2dbf73ad 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -11,7 +11,7 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.florian-schroff.de/publications/CUB-200.pdf", dataset={ "path": "JamieSJS/stanford-online-products", - "revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", + "revision": "4ac3894bdabee3c3938cf0133ab991c4b501891d", }, type="Retrieval", category="i2i", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json deleted file mode 100644 index 6c038f69cf..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", - "evaluation_time": 510.43445205688477, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 0.50296, - "cv_recall_at_10": 0.67867, - "cv_recall_at_100": 0.80207, - "cv_recall_at_1000": 0.90797, - "cv_recall_at_20": 0.71918, - "cv_recall_at_3": 0.59883, - "cv_recall_at_5": 0.63223, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 0.50296, - "map_at_1": 0.01743, - "map_at_10": 0.18702, - "map_at_100": 0.19992, - "map_at_1000": 0.20163, - "map_at_20": 0.19415, - "map_at_3": 0.13926, - "map_at_5": 0.16856, - "mrr_at_1": 0.020809401784925335, - "mrr_at_10": 0.30757755224265354, - "mrr_at_100": 0.3127412453176731, - "mrr_at_1000": 0.3131742235270453, - "mrr_at_20": 0.3106655733183114, - "mrr_at_3": 0.28876174486759937, - "mrr_at_5": 0.30027318782951246, - "nauc_cv_recall_at_1000_diff1": -0.16859046840753472, - "nauc_cv_recall_at_1000_max": 0.025825747180792717, - "nauc_cv_recall_at_1000_std": 0.10093428974009021, - "nauc_cv_recall_at_100_diff1": -0.21029734576768694, - "nauc_cv_recall_at_100_max": 0.014647522560512015, - "nauc_cv_recall_at_100_std": 0.0507444053462594, - "nauc_cv_recall_at_10_diff1": -0.27159676443421377, - "nauc_cv_recall_at_10_max": 0.005491087955239775, - "nauc_cv_recall_at_10_std": 0.02474251556064112, - "nauc_cv_recall_at_1_diff1": -0.2932928014798219, - "nauc_cv_recall_at_1_max": -0.002633536711876493, - "nauc_cv_recall_at_1_std": -0.008456158086326292, - "nauc_cv_recall_at_20_diff1": -0.25166875961614193, - "nauc_cv_recall_at_20_max": 0.006747419978420269, - "nauc_cv_recall_at_20_std": 0.035987294094302555, - "nauc_cv_recall_at_3_diff1": -0.29779984100144863, - "nauc_cv_recall_at_3_max": -0.0035652648610872604, - "nauc_cv_recall_at_3_std": 0.014056845458600668, - "nauc_cv_recall_at_5_diff1": -0.2888857057827477, - "nauc_cv_recall_at_5_max": 7.1464099594732e-05, - "nauc_cv_recall_at_5_std": 0.01823034964853647, - "nauc_map_at_1000_diff1": -0.2714835648506667, - "nauc_map_at_1000_max": 0.0048783333927581995, - "nauc_map_at_1000_std": 0.10933153351257194, - "nauc_map_at_100_diff1": -0.27106477649697597, - "nauc_map_at_100_max": 0.004961279421023488, - "nauc_map_at_100_std": 0.10869233131948694, - "nauc_map_at_10_diff1": -0.27101224264468865, - "nauc_map_at_10_max": 0.003129881807984945, - "nauc_map_at_10_std": 0.09970024568152357, - "nauc_map_at_1_diff1": -0.5746988023835725, - "nauc_map_at_1_max": 0.009513570955681795, - "nauc_map_at_1_std": 0.24050607736711668, - "nauc_map_at_20_diff1": -0.27103409286809227, - "nauc_map_at_20_max": 0.004505986343978161, - "nauc_map_at_20_std": 0.10422239017835391, - "nauc_map_at_3_diff1": -0.25758853794935105, - "nauc_map_at_3_max": 0.0003165186809392022, - "nauc_map_at_3_std": 0.073787815755662, - "nauc_map_at_5_diff1": -0.26840871915761827, - "nauc_map_at_5_max": 0.002542520569357188, - "nauc_map_at_5_std": 0.0864360224226272, - "nauc_mrr_at_1000_diff1": -0.352973203440373, - "nauc_mrr_at_1000_max": 0.004279169460655329, - "nauc_mrr_at_1000_std": 0.05010168406176362, - "nauc_mrr_at_100_diff1": -0.35282838896266405, - "nauc_mrr_at_100_max": 0.004246244348126371, - "nauc_mrr_at_100_std": 0.0500103093583224, - "nauc_mrr_at_10_diff1": -0.3544263028903223, - "nauc_mrr_at_10_max": 0.003929367832392052, - "nauc_mrr_at_10_std": 0.0478252958551306, - "nauc_mrr_at_1_diff1": -0.5746988023835726, - "nauc_mrr_at_1_max": 0.029047376939523493, - "nauc_mrr_at_1_std": 0.2870259521668249, - "nauc_mrr_at_20_diff1": -0.35339280008168317, - "nauc_mrr_at_20_max": 0.004175461034327089, - "nauc_mrr_at_20_std": 0.049148248185835845, - "nauc_mrr_at_3_diff1": -0.3591723252785509, - "nauc_mrr_at_3_max": 0.00403104720219049, - "nauc_mrr_at_3_std": 0.0404960811268073, - "nauc_mrr_at_5_diff1": -0.3585645562893135, - "nauc_mrr_at_5_max": 0.0020548539639942804, - "nauc_mrr_at_5_std": 0.045522727902755446, - "nauc_ndcg_at_1000_diff1": -0.2685009406444735, - "nauc_ndcg_at_1000_max": 0.005836111353149481, - "nauc_ndcg_at_1000_std": 0.11311294743168826, - "nauc_ndcg_at_100_diff1": -0.2626615425913708, - "nauc_ndcg_at_100_max": 0.006500040666350577, - "nauc_ndcg_at_100_std": 0.10412717822836703, - "nauc_ndcg_at_10_diff1": -0.26986440689503255, - "nauc_ndcg_at_10_max": 0.002259064902287912, - "nauc_ndcg_at_10_std": 0.08351960465910635, - "nauc_ndcg_at_1_diff1": -0.5746988023835727, - "nauc_ndcg_at_1_max": 0.01860068455415359, - "nauc_ndcg_at_1_std": 0.24591999227187353, - "nauc_ndcg_at_20_diff1": -0.26557544406085987, - "nauc_ndcg_at_20_max": 0.0047811746525403785, - "nauc_ndcg_at_20_std": 0.09053231502866856, - "nauc_ndcg_at_3_diff1": -0.3079122639483788, - "nauc_ndcg_at_3_max": 0.001817933912863451, - "nauc_ndcg_at_3_std": 0.05331981166099078, - "nauc_ndcg_at_5_diff1": -0.29161633386458363, - "nauc_ndcg_at_5_max": 0.00323633286273378, - "nauc_ndcg_at_5_std": 0.07076540546903416, - "nauc_precision_at_1000_diff1": -0.13140300790068282, - "nauc_precision_at_1000_max": 0.005922440105917975, - "nauc_precision_at_1000_std": 0.08342438590882045, - "nauc_precision_at_100_diff1": -0.17052567611996527, - "nauc_precision_at_100_max": 0.011026201381103823, - "nauc_precision_at_100_std": 0.09223669236894032, - "nauc_precision_at_10_diff1": -0.23209493838034498, - "nauc_precision_at_10_max": 0.004908243372756972, - "nauc_precision_at_10_std": 0.07007542762563608, - "nauc_precision_at_1_diff1": -0.5746988023835727, - "nauc_precision_at_1_max": 0.01860068455415359, - "nauc_precision_at_1_std": 0.24591999227187353, - "nauc_precision_at_20_diff1": -0.2109568972104465, - "nauc_precision_at_20_max": 0.007714408837495573, - "nauc_precision_at_20_std": 0.07965290786741164, - "nauc_precision_at_3_diff1": -0.2870525654193057, - "nauc_precision_at_3_max": -0.0007584139080774421, - "nauc_precision_at_3_std": 0.0318750842480405, - "nauc_precision_at_5_diff1": -0.25948022131281406, - "nauc_precision_at_5_max": 0.004692362608818381, - "nauc_precision_at_5_std": 0.04879698010109281, - "nauc_recall_at_1000_diff1": -0.12358858307301535, - "nauc_recall_at_1000_max": 0.009306080801565777, - "nauc_recall_at_1000_std": 0.15591617596817545, - "nauc_recall_at_100_diff1": -0.14715964976867443, - "nauc_recall_at_100_max": 0.01041920443322213, - "nauc_recall_at_100_std": 0.11606682272338587, - "nauc_recall_at_10_diff1": -0.18845161575089034, - "nauc_recall_at_10_max": 0.000650022994426268, - "nauc_recall_at_10_std": 0.07614315809536469, - "nauc_recall_at_1_diff1": -0.5746988023835725, - "nauc_recall_at_1_max": 0.009513570955681795, - "nauc_recall_at_1_std": 0.24050607736711668, - "nauc_recall_at_20_diff1": -0.17338195050795077, - "nauc_recall_at_20_max": 0.006581585832690798, - "nauc_recall_at_20_std": 0.08663740977783436, - "nauc_recall_at_3_diff1": -0.19307909748063756, - "nauc_recall_at_3_max": 0.0003553588908448416, - "nauc_recall_at_3_std": 0.0442289537166574, - "nauc_recall_at_5_diff1": -0.19899176174509764, - "nauc_recall_at_5_max": -0.0005095021106718202, - "nauc_recall_at_5_std": 0.060914430592624226, - "ndcg_at_1": 0.03583, - "ndcg_at_10": 0.27789, - "ndcg_at_100": 0.32166, - "ndcg_at_1000": 0.35384, - "ndcg_at_20": 0.29399, - "ndcg_at_3": 0.26171, - "ndcg_at_5": 0.27031, - "precision_at_1": 0.03583, - "precision_at_10": 0.13151, - "precision_at_100": 0.01932, - "precision_at_1000": 0.0027, - "precision_at_20": 0.07531, - "precision_at_3": 0.26493, - "precision_at_5": 0.21197, - "recall_at_1": 0.01743, - "recall_at_10": 0.3639, - "recall_at_100": 0.50674, - "recall_at_1000": 0.67803, - "recall_at_20": 0.40787, - "recall_at_3": 0.24952, - "recall_at_5": 0.30796 - } - ] - }, - "task_name": "SOPI2IRetrieval" -} \ No newline at end of file From 91ad5650c473a5b7d7e96f4f1ccdb9f96787af21 Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Mon, 23 Sep 2024 17:04:30 +0100 Subject: [PATCH 25/73] fix ImageTextPair dataloading for large datasets; more compositionality evaluation datasets --- {mieb-docs => docs/mieb-docs}/README.md | 0 .../AbsTaskImageTextPairClassification.py | 20 +-- .../ImageTextPairClassificationEvaluator.py | 156 +++++++++++++----- .../AROCocoOrder.py | 57 +++++++ .../AROFlickrOrder.py | 57 +++++++ .../ImageTextPairClassification/__init__.py | 2 + .../AROCocoOrder.json | 21 +++ .../AROFlickrOrder.json | 21 +++ .../AROCocoOrder.json | 21 +++ .../AROFlickrOrder.json | 21 +++ .../AROCocoOrder.json | 21 +++ .../AROFlickrOrder.json | 21 +++ 12 files changed, 357 insertions(+), 61 deletions(-) rename {mieb-docs => docs/mieb-docs}/README.md (100%) create mode 100644 mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py create mode 100644 mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py create mode 100644 results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROCocoOrder.json create mode 100644 results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROFlickrOrder.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROCocoOrder.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROFlickrOrder.json create mode 100644 results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROCocoOrder.json create mode 100644 results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROFlickrOrder.json diff --git a/mieb-docs/README.md b/docs/mieb-docs/README.md similarity index 100% rename from mieb-docs/README.md rename to docs/mieb-docs/README.md diff --git a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py index c6d4a6a2de..49523c58f9 100644 --- a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py @@ -4,7 +4,6 @@ from typing import Any from datasets import Dataset -from tqdm import tqdm from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode from ...evaluation.evaluators import ImageTextPairClassificationEvaluator @@ -32,18 +31,6 @@ class AbsTaskImageTextPairClassification(AbsTask): def __init__(self, **kwargs): super().__init__(**kwargs) - def _preprocess_column( - self, dataset: Dataset, column_names: str | list[str] - ) -> list[list[Any]]: - """Group examples from the columns into a list of examples.""" - if isinstance(column_names, str): - return dataset[column_names] - - return [ - [example[col] for col in column_names] - for example in tqdm(dataset, desc=f"Processing columns {column_names}") - ] - def _add_main_score(self, scores) -> None: scores["main_score"] = scores[self.metadata.main_score] @@ -60,11 +47,10 @@ def _evaluate_subset( encode_kwargs: dict[str, Any] = {}, **kwargs, ) -> ScoresDict: - images = self._preprocess_column(dataset, self.images_column_names) - texts = self._preprocess_column(dataset, self.texts_column_names) evaluator = ImageTextPairClassificationEvaluator( - images, - texts, + dataset, + images_column_names=self.images_column_names, + texts_column_names=self.texts_column_names, task_name=self.metadata.name, **kwargs, ) diff --git a/mteb/evaluation/evaluators/Image/ImageTextPairClassificationEvaluator.py b/mteb/evaluation/evaluators/Image/ImageTextPairClassificationEvaluator.py index 403b3758f2..b548da365e 100644 --- a/mteb/evaluation/evaluators/Image/ImageTextPairClassificationEvaluator.py +++ b/mteb/evaluation/evaluators/Image/ImageTextPairClassificationEvaluator.py @@ -1,18 +1,58 @@ from __future__ import annotations -import itertools import logging from typing import Any import torch import torch.nn.functional as F -from PIL import Image +from torch.utils.data import DataLoader +from torchvision import transforms from mteb.encoder_interface import Encoder, EncoderWithSimilarity from mteb.evaluation.evaluators.Evaluator import Evaluator logger = logging.getLogger(__name__) +transform = transforms.Compose([transforms.PILToTensor()]) + + +class ImageTextDataset(torch.utils.data.Dataset): + def __init__( + self, hf_dataset, images_column_names, texts_column_names, transform=None + ): + self.dataset = hf_dataset + self.transform = transform + self.images_column_names = images_column_names + self.texts_column_names = texts_column_names + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, idx): + data = self.dataset[idx] + + # Get images + if isinstance(self.images_column_names, str): + images = data[self.images_column_names] + else: + images = [data[col] for col in self.images_column_names] + + # Apply transforms to images + if self.transform is not None: + images = [self.transform(img) for img in images] + + # Get texts + if isinstance(self.texts_column_names, str): + texts = data[self.texts_column_names] + else: + texts = [data[col] for col in self.texts_column_names] + + return images, texts + + +def custom_collate_fn(batch): + return batch + class ImageTextPairClassificationEvaluator(Evaluator): """Evaluate a model based on the similarity of the embeddings by calculating the accuracy of @@ -30,21 +70,22 @@ class ImageTextPairClassificationEvaluator(Evaluator): def __init__( self, - images: list[list[Image.Image]], - texts: list[list[str]], + dataset, + images_column_names: str | list[str], + texts_column_names: str | list[str], task_name: str | None = None, + transform=None, limit: int | None = None, **kwargs, ): super().__init__(**kwargs) if limit: - images = images[:limit] - texts = texts[:limit] - self.images = images - self.texts = texts + dataset = dataset.select(range(limit)) + self.dataset = dataset + self.images_column_names = images_column_names + self.texts_column_names = texts_column_names self.task_name = task_name - - assert len(self.images) == len(self.texts) + self.transform = transform def __call__( self, @@ -54,21 +95,31 @@ def __call__( if "batch_size" not in encode_kwargs: encode_kwargs["batch_size"] = 64 - num_samples = len(self.images) - num_images_per_sample = len(self.images[0]) - num_texts_per_sample = len(self.texts[0]) - - images = list(itertools.chain.from_iterable(self.images)) - texts = list(itertools.chain.from_iterable(self.texts)) - - image_embeddings = F.normalize( - model.get_image_embeddings(images, batch_size=encode_kwargs["batch_size"]), - dim=-1, - ).view(num_samples, num_images_per_sample, -1) - text_embeddings = F.normalize( - model.get_text_embeddings(texts, batch_size=encode_kwargs["batch_size"]), - dim=-1, - ).view(num_samples, num_texts_per_sample, -1) + data_loader = DataLoader( + ImageTextDataset( + self.dataset, + self.images_column_names, + self.texts_column_names, + transform=self.transform, + ), + batch_size=encode_kwargs["batch_size"], + shuffle=False, + # collate_fn=lambda x: x, # Identity collate function + collate_fn=custom_collate_fn, + num_workers=4, + ) + + num_images_per_sample = ( + len(self.images_column_names) + if isinstance(self.images_column_names, list) + else 1 + ) + num_texts_per_sample = ( + len(self.texts_column_names) + if isinstance(self.texts_column_names, list) + else 1 + ) + img_ground_truths = torch.arange(num_images_per_sample) caption_ground_truths = torch.arange(num_texts_per_sample) @@ -76,25 +127,42 @@ def __call__( text_score = [] score = [] - for i in range(num_samples): - images_emb = image_embeddings[i] - texts_emb = text_embeddings[i] - scores = ( - images_emb @ texts_emb.t() - ) # shape = (num_images_per_sample x num_texts_per_sample) - - image_closest_text = scores.argmax(dim=1) # shape = (num_images_per_sample) - text_closest_image = scores.argmax(dim=0) # shape = (num_texts_per_sample) - pred_text_is_correct = ( - (image_closest_text == img_ground_truths).all().item() - ) - pred_image_is_correct = ( - (text_closest_image == caption_ground_truths).all().item() - ) - all_correct = pred_text_is_correct and pred_image_is_correct - image_score.append(pred_image_is_correct) - text_score.append(pred_text_is_correct) - score.append(all_correct) + for batch in data_loader: + images_list, texts_list = zip(*batch) + images = [img for images in images_list for img in images] + texts = [txt for texts in texts_list for txt in texts] + images_emb = F.normalize( + model.get_image_embeddings(images, batch_size=len(images)), + dim=-1, + ).view(len(batch), num_images_per_sample, -1) + texts_emb = F.normalize( + model.get_text_embeddings(texts, batch_size=len(texts)), + dim=-1, + ).view(len(batch), num_texts_per_sample, -1) + for i in range(len(batch)): + img_emb = images_emb[i] + txt_emb = texts_emb[i] + + scores = ( + img_emb @ txt_emb.t() + ) # shape = (num_images_per_sample x num_texts_per_sample) + + image_closest_text = scores.argmax( + dim=1 + ) # shape = (num_images_per_sample) + text_closest_image = scores.argmax( + dim=0 + ) # shape = (num_texts_per_sample) + pred_text_is_correct = ( + (image_closest_text == img_ground_truths).all().item() + ) + pred_image_is_correct = ( + (text_closest_image == caption_ground_truths).all().item() + ) + all_correct = pred_text_is_correct and pred_image_is_correct + image_score.append(pred_image_is_correct) + text_score.append(pred_text_is_correct) + score.append(all_correct) metrics = {} metrics["image_acc"] = torch.Tensor(image_score).float().mean().item() diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py new file mode 100644 index 0000000000..bc9c50a3ba --- /dev/null +++ b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskImageTextPairClassification import ( + AbsTaskImageTextPairClassification, +) +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class AROCocoOrder(AbsTaskImageTextPairClassification): + images_column_names = ["images"] + texts_column_names = [ + "correct_caption", + "hard_text_1", + "hard_text_2", + "hard_text_3", + "hard_text_4", + ] + + metadata = TaskMetadata( + name="AROCocoOrder", + description="Compositionality Evaluation of images to their captions." + + "Each capation has four hard negatives created by order permutations.", + reference="https://proceedings.neurips.cc/paper_files/paper/2023/hash/63461de0b4cb760fc498e85b18a7fe81-Abstract-Datasets_and_Benchmarks.html", + dataset={ + "path": "gowitheflow/ARO-COCO-order", + "revision": "853ec8757226585a38a80886c51fe0f3f268787c", + }, + type="ImageTextPairClassification", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="text_acc", + date=( + "2022-01-01", + "2022-12-31", + ), # Estimated range for the collection of data + form=["written"], + domains=["Encyclopaedic"], + task_subtypes=["Caption Pairing"], + license="Not specified", + socioeconomic_status="mixed", + annotations_creators="expert-annotated", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation="""@article{hsieh2024sugarcrepe, + title={Sugarcrepe: Fixing hackable benchmarks for vision-language compositionality}, + author={Hsieh, Cheng-Yu and Zhang, Jieyu and Ma, Zixian and Kembhavi, Aniruddha and Krishna, Ranjay}, + journal={Advances in neural information processing systems}, + volume={36}, + year={2024} +}""", + descriptive_stats={ + "n_samples": {"test": 25010}, + "avg_character_length": {"test": 1}, + }, + ) diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py new file mode 100644 index 0000000000..b423861f23 --- /dev/null +++ b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskImageTextPairClassification import ( + AbsTaskImageTextPairClassification, +) +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class AROFlickrOrder(AbsTaskImageTextPairClassification): + images_column_names = ["images"] + texts_column_names = [ + "correct_caption", + "hard_text_1", + "hard_text_2", + "hard_text_3", + "hard_text_4", + ] + + metadata = TaskMetadata( + name="AROFlickrOrder", + description="Compositionality Evaluation of images to their captions." + + "Each capation has four hard negatives created by order permutations.", + reference="https://proceedings.neurips.cc/paper_files/paper/2023/hash/63461de0b4cb760fc498e85b18a7fe81-Abstract-Datasets_and_Benchmarks.html", + dataset={ + "path": "gowitheflow/ARO-Flickr-Order", + "revision": "1f9485f69c87947812378a1aedf86410c86a0aa8", + }, + type="ImageTextPairClassification", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="text_acc", + date=( + "2022-01-01", + "2022-12-31", + ), # Estimated range for the collection of data + form=["written"], + domains=["Encyclopaedic"], + task_subtypes=["Caption Pairing"], + license="Not specified", + socioeconomic_status="mixed", + annotations_creators="expert-annotated", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation="""@article{hsieh2024sugarcrepe, + title={Sugarcrepe: Fixing hackable benchmarks for vision-language compositionality}, + author={Hsieh, Cheng-Yu and Zhang, Jieyu and Ma, Zixian and Kembhavi, Aniruddha and Krishna, Ranjay}, + journal={Advances in neural information processing systems}, + volume={36}, + year={2024} +}""", + descriptive_stats={ + "n_samples": {"test": 5000}, + "avg_character_length": {"test": 1}, + }, + ) diff --git a/mteb/tasks/Image/ImageTextPairClassification/__init__.py b/mteb/tasks/Image/ImageTextPairClassification/__init__.py index d35de07c28..69f0a9fbc1 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/__init__.py +++ b/mteb/tasks/Image/ImageTextPairClassification/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +from .AROCocoOrder import * +from .AROFlickrOrder import * from .AROVisualAttribution import * from .AROVisualRelation import * from .SugarCrepe import * diff --git a/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROCocoOrder.json b/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROCocoOrder.json new file mode 100644 index 0000000000..8924145dd2 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROCocoOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "853ec8757226585a38a80886c51fe0f3f268787c", + "evaluation_time": 207.2516052722931, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.4812075197696686, + "text_acc": 0.4812075197696686 + } + ] + }, + "task_name": "AROCocoOrder" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROFlickrOrder.json b/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROFlickrOrder.json new file mode 100644 index 0000000000..b69a24d891 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch16/57c216476eefef5ab752ec549e440a49ae4ae5f3/AROFlickrOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "1f9485f69c87947812378a1aedf86410c86a0aa8", + "evaluation_time": 37.06100630760193, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.5583999752998352, + "text_acc": 0.5583999752998352 + } + ] + }, + "task_name": "AROFlickrOrder" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROCocoOrder.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROCocoOrder.json new file mode 100644 index 0000000000..ed54c38473 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROCocoOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "853ec8757226585a38a80886c51fe0f3f268787c", + "evaluation_time": 158.77182126045227, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.46009597182273865, + "text_acc": 0.46009597182273865 + } + ] + }, + "task_name": "AROCocoOrder" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROFlickrOrder.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROFlickrOrder.json new file mode 100644 index 0000000000..6cdefe46fb --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/AROFlickrOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "1f9485f69c87947812378a1aedf86410c86a0aa8", + "evaluation_time": 29.562106609344482, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.5612000226974487, + "text_acc": 0.5612000226974487 + } + ] + }, + "task_name": "AROFlickrOrder" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROCocoOrder.json b/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROCocoOrder.json new file mode 100644 index 0000000000..5d7cf611c6 --- /dev/null +++ b/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROCocoOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "853ec8757226585a38a80886c51fe0f3f268787c", + "evaluation_time": 432.3775689601898, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.4538184702396393, + "text_acc": 0.4538184702396393 + } + ] + }, + "task_name": "AROCocoOrder" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROFlickrOrder.json b/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROFlickrOrder.json new file mode 100644 index 0000000000..b67b51a5cd --- /dev/null +++ b/results-mieb/openai__clip-vit-large-patch14/32bd64288804d66eefd0ccbe215aa642df71cc41/AROFlickrOrder.json @@ -0,0 +1,21 @@ +{ + "dataset_revision": "1f9485f69c87947812378a1aedf86410c86a0aa8", + "evaluation_time": 84.40965294837952, + "kg_co2_emissions": null, + "mteb_version": "1.14.15", + "scores": { + "test": [ + { + "accuracy": 0.0, + "hf_subset": "default", + "image_acc": 0.0, + "languages": [ + "eng-Latn" + ], + "main_score": 0.5440000295639038, + "text_acc": 0.5440000295639038 + } + ] + }, + "task_name": "AROFlickrOrder" +} \ No newline at end of file From fc4d1ee2a6cba3bdd6eb04cfe563458c4cbd892f Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 24 Sep 2024 13:25:33 +0100 Subject: [PATCH 26/73] add RP2kI2IRetrieval and METI2IRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index ab2dbf73ad..378553e27b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -18,7 +18,7 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="cv_recall_at_1", - date=("2009-01-01", "2010-04-01"), + date=("2019-07-17", "2019-07-17"), domains=["Encyclopaedic"], task_subtypes=["Object recognition"], license="Not specified", From 7d4f3326f0726c8a20fb1f9caf1b487038260136 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 24 Sep 2024 13:27:26 +0100 Subject: [PATCH 27/73] add METI2IRetreival --- .../Any2AnyRetrieval/eng/METI2IRetrieval.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py new file mode 100644 index 0000000000..ad6157c58c --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class METI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="METI2IRetrieval", + description="Retrieve photos of more than 224k artworks.", + reference="https://arxiv.org/abs/2202.01747", + dataset={ + "path": "JamieSJS/met", + "revision": "994fe3c451b8d74e0b255b17f87305e8ac95d9cb", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2021-12-06", "2021-12-14"), # conference dates + domains=["Encyclopaedic"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{ypsilantis2021met, + title={The met dataset: Instance-level recognition for artworks}, + author={Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, + booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + year={2021} +} + """, + descriptive_stats={ + #"n_samples": {"default": 397121}, + }, + ) + skip_first_result = True From 57593ebb496388d34c20c9006a67ed50121ffeac Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 24 Sep 2024 13:27:43 +0100 Subject: [PATCH 28/73] add SOP results --- .../SOPI2IRetrieval.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json new file mode 100644 index 0000000000..77f92463bd --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "4ac3894bdabee3c3938cf0133ab991c4b501891d", + "evaluation_time": 1411.4596996307373, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.4856, + "cv_recall_at_10": 0.65892, + "cv_recall_at_100": 0.80265, + "cv_recall_at_1000": 0.91906, + "cv_recall_at_20": 0.70484, + "cv_recall_at_3": 0.57481, + "cv_recall_at_5": 0.61136, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.4856, + "map_at_1": 0.18847, + "map_at_10": 0.37878, + "map_at_100": 0.39701, + "map_at_1000": 0.39955, + "map_at_20": 0.38884, + "map_at_3": 0.31574, + "map_at_5": 0.35297, + "mrr_at_1": 0.9996334952062839, + "mrr_at_10": 0.9998132769138076, + "mrr_at_100": 0.9998132769138076, + "mrr_at_1000": 0.9998132769138076, + "mrr_at_20": 0.9998132769138076, + "mrr_at_3": 0.999811194500207, + "mrr_at_5": 0.9998132769138076, + "nauc_cv_recall_at_1000_diff1": -0.4588738282028226, + "nauc_cv_recall_at_1000_max": -0.00045154026330132926, + "nauc_cv_recall_at_1000_std": 0.13389839300845494, + "nauc_cv_recall_at_100_diff1": -0.40349410035175093, + "nauc_cv_recall_at_100_max": -0.0015857946226738907, + "nauc_cv_recall_at_100_std": 0.1564430560934902, + "nauc_cv_recall_at_10_diff1": -0.4005181992520635, + "nauc_cv_recall_at_10_max": -0.003957796532778069, + "nauc_cv_recall_at_10_std": 0.14566052153014458, + "nauc_cv_recall_at_1_diff1": -0.40652510568056555, + "nauc_cv_recall_at_1_max": -0.00544324116729814, + "nauc_cv_recall_at_1_std": 0.11520889475416408, + "nauc_cv_recall_at_20_diff1": -0.3973805526446551, + "nauc_cv_recall_at_20_max": -0.0046404467731546205, + "nauc_cv_recall_at_20_std": 0.14736528921128944, + "nauc_cv_recall_at_3_diff1": -0.4068997028240004, + "nauc_cv_recall_at_3_max": -0.00581379693442588, + "nauc_cv_recall_at_3_std": 0.13232980933178856, + "nauc_cv_recall_at_5_diff1": -0.4027824500878659, + "nauc_cv_recall_at_5_max": -0.00622124305658329, + "nauc_cv_recall_at_5_std": 0.13795375991899358, + "nauc_map_at_1000_diff1": -0.2815134589062327, + "nauc_map_at_1000_max": -0.009309150035044157, + "nauc_map_at_1000_std": 0.21821074035964452, + "nauc_map_at_100_diff1": -0.2802916020865774, + "nauc_map_at_100_max": -0.00929273233548749, + "nauc_map_at_100_std": 0.21738215325378304, + "nauc_map_at_10_diff1": -0.2714998577314571, + "nauc_map_at_10_max": -0.00972009151554314, + "nauc_map_at_10_std": 0.20451609552118097, + "nauc_map_at_1_diff1": 0.006605172707482505, + "nauc_map_at_1_max": -0.00535293182577402, + "nauc_map_at_1_std": 0.08912323354604411, + "nauc_map_at_20_diff1": -0.276631619640432, + "nauc_map_at_20_max": -0.009275901369492237, + "nauc_map_at_20_std": 0.21273445874298258, + "nauc_map_at_3_diff1": -0.2144365987666858, + "nauc_map_at_3_max": -0.008565184161123921, + "nauc_map_at_3_std": 0.1490404208758112, + "nauc_map_at_5_diff1": -0.2525988859250142, + "nauc_map_at_5_max": -0.00899732355635905, + "nauc_map_at_5_std": 0.18031989165161078, + "nauc_mrr_at_1000_diff1": 1.0, + "nauc_mrr_at_1000_max": 0.17398421274750922, + "nauc_mrr_at_1000_std": -0.3793244588442443, + "nauc_mrr_at_100_diff1": 1.0, + "nauc_mrr_at_100_max": 0.17398421274750922, + "nauc_mrr_at_100_std": -0.3793244588442443, + "nauc_mrr_at_10_diff1": 1.0, + "nauc_mrr_at_10_max": 0.17398421274750922, + "nauc_mrr_at_10_std": -0.3793244588442443, + "nauc_mrr_at_1_diff1": 1.0, + "nauc_mrr_at_1_max": 0.17317138790943362, + "nauc_mrr_at_1_std": -0.36469583799829147, + "nauc_mrr_at_20_diff1": 1.0, + "nauc_mrr_at_20_max": 0.17398421274750922, + "nauc_mrr_at_20_std": -0.3793244588442443, + "nauc_mrr_at_3_diff1": 1.0, + "nauc_mrr_at_3_max": 0.17341951004597636, + "nauc_mrr_at_3_std": -0.3878434792194687, + "nauc_mrr_at_5_diff1": 1.0, + "nauc_mrr_at_5_max": 0.17398421274750922, + "nauc_mrr_at_5_std": -0.3793244588442443, + "nauc_ndcg_at_1000_diff1": -0.3092675026331808, + "nauc_ndcg_at_1000_max": -0.009597931782047127, + "nauc_ndcg_at_1000_std": 0.23456527611896652, + "nauc_ndcg_at_100_diff1": -0.29597233628106967, + "nauc_ndcg_at_100_max": -0.009309685183289425, + "nauc_ndcg_at_100_std": 0.22752615017083713, + "nauc_ndcg_at_10_diff1": -0.2930380020320867, + "nauc_ndcg_at_10_max": -0.01000547366692856, + "nauc_ndcg_at_10_std": 0.20753291775628463, + "nauc_ndcg_at_1_diff1": 1.0, + "nauc_ndcg_at_1_max": 0.17610023993447146, + "nauc_ndcg_at_1_std": -0.36469583799829147, + "nauc_ndcg_at_20_diff1": -0.28823218777084936, + "nauc_ndcg_at_20_max": -0.00923331182370813, + "nauc_ndcg_at_20_std": 0.215531645591675, + "nauc_ndcg_at_3_diff1": -0.3656155045971337, + "nauc_ndcg_at_3_max": -0.007681435647815146, + "nauc_ndcg_at_3_std": 0.14037876964394946, + "nauc_ndcg_at_5_diff1": -0.3365107912821576, + "nauc_ndcg_at_5_max": -0.008614755602549906, + "nauc_ndcg_at_5_std": 0.18007814023447868, + "nauc_precision_at_1000_diff1": -0.19806317219535188, + "nauc_precision_at_1000_max": -0.0015221284698821193, + "nauc_precision_at_1000_std": 0.10530172591311254, + "nauc_precision_at_100_diff1": -0.2572749977303199, + "nauc_precision_at_100_max": -0.003787347697600317, + "nauc_precision_at_100_std": 0.1656157857683021, + "nauc_precision_at_10_diff1": -0.3101437993491914, + "nauc_precision_at_10_max": -0.007420244456215511, + "nauc_precision_at_10_std": 0.17668167529658718, + "nauc_precision_at_1_diff1": 1.0, + "nauc_precision_at_1_max": 0.17610023993447146, + "nauc_precision_at_1_std": -0.36469583799829147, + "nauc_precision_at_20_diff1": -0.29472889748829945, + "nauc_precision_at_20_max": -0.005520841437776455, + "nauc_precision_at_20_std": 0.18138281023915606, + "nauc_precision_at_3_diff1": -0.35841782500948777, + "nauc_precision_at_3_max": -0.00748476584740033, + "nauc_precision_at_3_std": 0.12872707729510002, + "nauc_precision_at_5_diff1": -0.3295916303527328, + "nauc_precision_at_5_max": -0.007145664789792643, + "nauc_precision_at_5_std": 0.15297931876842116, + "nauc_recall_at_1000_diff1": -0.25792280904882936, + "nauc_recall_at_1000_max": -0.00797884188493623, + "nauc_recall_at_1000_std": 0.24244025939864458, + "nauc_recall_at_100_diff1": -0.2524975921657061, + "nauc_recall_at_100_max": -0.007586553109911903, + "nauc_recall_at_100_std": 0.23439228371650886, + "nauc_recall_at_10_diff1": -0.2562100752922837, + "nauc_recall_at_10_max": -0.009867846823865567, + "nauc_recall_at_10_std": 0.20691424138363798, + "nauc_recall_at_1_diff1": 0.006605172707482505, + "nauc_recall_at_1_max": -0.00535293182577402, + "nauc_recall_at_1_std": 0.08912323354604411, + "nauc_recall_at_20_diff1": -0.2560836034546459, + "nauc_recall_at_20_max": -0.007754724440480409, + "nauc_recall_at_20_std": 0.21910103605653863, + "nauc_recall_at_3_diff1": -0.21039105024165025, + "nauc_recall_at_3_max": -0.008374958963214659, + "nauc_recall_at_3_std": 0.1489992487923535, + "nauc_recall_at_5_diff1": -0.242995017130598, + "nauc_recall_at_5_max": -0.00909414443674345, + "nauc_recall_at_5_std": 0.18059416206675072, + "ndcg_at_1": 0.99963, + "ndcg_at_10": 0.52441, + "ndcg_at_100": 0.5623, + "ndcg_at_1000": 0.59803, + "ndcg_at_20": 0.53321, + "ndcg_at_3": 0.69421, + "ndcg_at_5": 0.59023, + "precision_at_1": 0.99963, + "precision_at_10": 0.24637, + "precision_at_100": 0.03343, + "precision_at_1000": 0.00451, + "precision_at_20": 0.13636, + "precision_at_3": 0.59241, + "precision_at_5": 0.42271, + "recall_at_1": 0.18847, + "recall_at_10": 0.41136, + "recall_at_100": 0.52984, + "recall_at_1000": 0.6822, + "recall_at_20": 0.44681, + "recall_at_3": 0.31926, + "recall_at_5": 0.3657 + } + ] + }, + "task_name": "SOPI2IRetrieval" +} \ No newline at end of file From 62e6e9fa2a6945f5dd5369e69e054fe3713660cc Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 24 Sep 2024 13:27:59 +0100 Subject: [PATCH 29/73] make lign --- mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index ad6157c58c..d4017175ce 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -18,7 +18,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="cv_recall_at_1", - date=("2021-12-06", "2021-12-14"), # conference dates + date=("2021-12-06", "2021-12-14"), # conference dates domains=["Encyclopaedic"], task_subtypes=["Object recognition"], license="Not specified", @@ -34,7 +34,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - #"n_samples": {"default": 397121}, + # "n_samples": {"default": 397121}, }, ) skip_first_result = True From 094a6548c28e14902b4faffcfaa650e53f2ef219 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 25 Sep 2024 09:56:27 +0100 Subject: [PATCH 30/73] new revision for METI2IRetrieval --- .../Any2AnyRetrieval/eng/METI2IRetrieval.py | 2 +- .../SOPI2IRetrieval.json | 180 ------------------ 2 files changed, 1 insertion(+), 181 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index d4017175ce..14fa576651 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -11,7 +11,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://arxiv.org/abs/2202.01747", dataset={ "path": "JamieSJS/met", - "revision": "994fe3c451b8d74e0b255b17f87305e8ac95d9cb", + "revision": "4e6db807efadebc1591332c3f5734f545b3dd258", }, type="Retrieval", category="i2i", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json index 87e39bd2fb..77f92463bd 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/SOPI2IRetrieval.json @@ -1,17 +1,11 @@ { -<<<<<<< HEAD "dataset_revision": "4ac3894bdabee3c3938cf0133ab991c4b501891d", "evaluation_time": 1411.4596996307373, -======= - "dataset_revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", - "evaluation_time": 510.43445205688477, ->>>>>>> d5bfecea0b9a038c9d004b1f061ee681f25aede6 "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { -<<<<<<< HEAD "cv_recall_at_1": 0.4856, "cv_recall_at_10": 0.65892, "cv_recall_at_100": 0.80265, @@ -19,20 +13,10 @@ "cv_recall_at_20": 0.70484, "cv_recall_at_3": 0.57481, "cv_recall_at_5": 0.61136, -======= - "cv_recall_at_1": 0.50296, - "cv_recall_at_10": 0.67867, - "cv_recall_at_100": 0.80207, - "cv_recall_at_1000": 0.90797, - "cv_recall_at_20": 0.71918, - "cv_recall_at_3": 0.59883, - "cv_recall_at_5": 0.63223, ->>>>>>> d5bfecea0b9a038c9d004b1f061ee681f25aede6 "hf_subset": "default", "languages": [ "eng-Latn" ], -<<<<<<< HEAD "main_score": 0.4856, "map_at_1": 0.18847, "map_at_10": 0.37878, @@ -195,170 +179,6 @@ "recall_at_20": 0.44681, "recall_at_3": 0.31926, "recall_at_5": 0.3657 -======= - "main_score": 0.50296, - "map_at_1": 0.01743, - "map_at_10": 0.18702, - "map_at_100": 0.19992, - "map_at_1000": 0.20163, - "map_at_20": 0.19415, - "map_at_3": 0.13926, - "map_at_5": 0.16856, - "mrr_at_1": 0.020809401784925335, - "mrr_at_10": 0.30757755224265354, - "mrr_at_100": 0.3127412453176731, - "mrr_at_1000": 0.3131742235270453, - "mrr_at_20": 0.3106655733183114, - "mrr_at_3": 0.28876174486759937, - "mrr_at_5": 0.30027318782951246, - "nauc_cv_recall_at_1000_diff1": -0.16859046840753472, - "nauc_cv_recall_at_1000_max": 0.025825747180792717, - "nauc_cv_recall_at_1000_std": 0.10093428974009021, - "nauc_cv_recall_at_100_diff1": -0.21029734576768694, - "nauc_cv_recall_at_100_max": 0.014647522560512015, - "nauc_cv_recall_at_100_std": 0.0507444053462594, - "nauc_cv_recall_at_10_diff1": -0.27159676443421377, - "nauc_cv_recall_at_10_max": 0.005491087955239775, - "nauc_cv_recall_at_10_std": 0.02474251556064112, - "nauc_cv_recall_at_1_diff1": -0.2932928014798219, - "nauc_cv_recall_at_1_max": -0.002633536711876493, - "nauc_cv_recall_at_1_std": -0.008456158086326292, - "nauc_cv_recall_at_20_diff1": -0.25166875961614193, - "nauc_cv_recall_at_20_max": 0.006747419978420269, - "nauc_cv_recall_at_20_std": 0.035987294094302555, - "nauc_cv_recall_at_3_diff1": -0.29779984100144863, - "nauc_cv_recall_at_3_max": -0.0035652648610872604, - "nauc_cv_recall_at_3_std": 0.014056845458600668, - "nauc_cv_recall_at_5_diff1": -0.2888857057827477, - "nauc_cv_recall_at_5_max": 7.1464099594732e-05, - "nauc_cv_recall_at_5_std": 0.01823034964853647, - "nauc_map_at_1000_diff1": -0.2714835648506667, - "nauc_map_at_1000_max": 0.0048783333927581995, - "nauc_map_at_1000_std": 0.10933153351257194, - "nauc_map_at_100_diff1": -0.27106477649697597, - "nauc_map_at_100_max": 0.004961279421023488, - "nauc_map_at_100_std": 0.10869233131948694, - "nauc_map_at_10_diff1": -0.27101224264468865, - "nauc_map_at_10_max": 0.003129881807984945, - "nauc_map_at_10_std": 0.09970024568152357, - "nauc_map_at_1_diff1": -0.5746988023835725, - "nauc_map_at_1_max": 0.009513570955681795, - "nauc_map_at_1_std": 0.24050607736711668, - "nauc_map_at_20_diff1": -0.27103409286809227, - "nauc_map_at_20_max": 0.004505986343978161, - "nauc_map_at_20_std": 0.10422239017835391, - "nauc_map_at_3_diff1": -0.25758853794935105, - "nauc_map_at_3_max": 0.0003165186809392022, - "nauc_map_at_3_std": 0.073787815755662, - "nauc_map_at_5_diff1": -0.26840871915761827, - "nauc_map_at_5_max": 0.002542520569357188, - "nauc_map_at_5_std": 0.0864360224226272, - "nauc_mrr_at_1000_diff1": -0.352973203440373, - "nauc_mrr_at_1000_max": 0.004279169460655329, - "nauc_mrr_at_1000_std": 0.05010168406176362, - "nauc_mrr_at_100_diff1": -0.35282838896266405, - "nauc_mrr_at_100_max": 0.004246244348126371, - "nauc_mrr_at_100_std": 0.0500103093583224, - "nauc_mrr_at_10_diff1": -0.3544263028903223, - "nauc_mrr_at_10_max": 0.003929367832392052, - "nauc_mrr_at_10_std": 0.0478252958551306, - "nauc_mrr_at_1_diff1": -0.5746988023835726, - "nauc_mrr_at_1_max": 0.029047376939523493, - "nauc_mrr_at_1_std": 0.2870259521668249, - "nauc_mrr_at_20_diff1": -0.35339280008168317, - "nauc_mrr_at_20_max": 0.004175461034327089, - "nauc_mrr_at_20_std": 0.049148248185835845, - "nauc_mrr_at_3_diff1": -0.3591723252785509, - "nauc_mrr_at_3_max": 0.00403104720219049, - "nauc_mrr_at_3_std": 0.0404960811268073, - "nauc_mrr_at_5_diff1": -0.3585645562893135, - "nauc_mrr_at_5_max": 0.0020548539639942804, - "nauc_mrr_at_5_std": 0.045522727902755446, - "nauc_ndcg_at_1000_diff1": -0.2685009406444735, - "nauc_ndcg_at_1000_max": 0.005836111353149481, - "nauc_ndcg_at_1000_std": 0.11311294743168826, - "nauc_ndcg_at_100_diff1": -0.2626615425913708, - "nauc_ndcg_at_100_max": 0.006500040666350577, - "nauc_ndcg_at_100_std": 0.10412717822836703, - "nauc_ndcg_at_10_diff1": -0.26986440689503255, - "nauc_ndcg_at_10_max": 0.002259064902287912, - "nauc_ndcg_at_10_std": 0.08351960465910635, - "nauc_ndcg_at_1_diff1": -0.5746988023835727, - "nauc_ndcg_at_1_max": 0.01860068455415359, - "nauc_ndcg_at_1_std": 0.24591999227187353, - "nauc_ndcg_at_20_diff1": -0.26557544406085987, - "nauc_ndcg_at_20_max": 0.0047811746525403785, - "nauc_ndcg_at_20_std": 0.09053231502866856, - "nauc_ndcg_at_3_diff1": -0.3079122639483788, - "nauc_ndcg_at_3_max": 0.001817933912863451, - "nauc_ndcg_at_3_std": 0.05331981166099078, - "nauc_ndcg_at_5_diff1": -0.29161633386458363, - "nauc_ndcg_at_5_max": 0.00323633286273378, - "nauc_ndcg_at_5_std": 0.07076540546903416, - "nauc_precision_at_1000_diff1": -0.13140300790068282, - "nauc_precision_at_1000_max": 0.005922440105917975, - "nauc_precision_at_1000_std": 0.08342438590882045, - "nauc_precision_at_100_diff1": -0.17052567611996527, - "nauc_precision_at_100_max": 0.011026201381103823, - "nauc_precision_at_100_std": 0.09223669236894032, - "nauc_precision_at_10_diff1": -0.23209493838034498, - "nauc_precision_at_10_max": 0.004908243372756972, - "nauc_precision_at_10_std": 0.07007542762563608, - "nauc_precision_at_1_diff1": -0.5746988023835727, - "nauc_precision_at_1_max": 0.01860068455415359, - "nauc_precision_at_1_std": 0.24591999227187353, - "nauc_precision_at_20_diff1": -0.2109568972104465, - "nauc_precision_at_20_max": 0.007714408837495573, - "nauc_precision_at_20_std": 0.07965290786741164, - "nauc_precision_at_3_diff1": -0.2870525654193057, - "nauc_precision_at_3_max": -0.0007584139080774421, - "nauc_precision_at_3_std": 0.0318750842480405, - "nauc_precision_at_5_diff1": -0.25948022131281406, - "nauc_precision_at_5_max": 0.004692362608818381, - "nauc_precision_at_5_std": 0.04879698010109281, - "nauc_recall_at_1000_diff1": -0.12358858307301535, - "nauc_recall_at_1000_max": 0.009306080801565777, - "nauc_recall_at_1000_std": 0.15591617596817545, - "nauc_recall_at_100_diff1": -0.14715964976867443, - "nauc_recall_at_100_max": 0.01041920443322213, - "nauc_recall_at_100_std": 0.11606682272338587, - "nauc_recall_at_10_diff1": -0.18845161575089034, - "nauc_recall_at_10_max": 0.000650022994426268, - "nauc_recall_at_10_std": 0.07614315809536469, - "nauc_recall_at_1_diff1": -0.5746988023835725, - "nauc_recall_at_1_max": 0.009513570955681795, - "nauc_recall_at_1_std": 0.24050607736711668, - "nauc_recall_at_20_diff1": -0.17338195050795077, - "nauc_recall_at_20_max": 0.006581585832690798, - "nauc_recall_at_20_std": 0.08663740977783436, - "nauc_recall_at_3_diff1": -0.19307909748063756, - "nauc_recall_at_3_max": 0.0003553588908448416, - "nauc_recall_at_3_std": 0.0442289537166574, - "nauc_recall_at_5_diff1": -0.19899176174509764, - "nauc_recall_at_5_max": -0.0005095021106718202, - "nauc_recall_at_5_std": 0.060914430592624226, - "ndcg_at_1": 0.03583, - "ndcg_at_10": 0.27789, - "ndcg_at_100": 0.32166, - "ndcg_at_1000": 0.35384, - "ndcg_at_20": 0.29399, - "ndcg_at_3": 0.26171, - "ndcg_at_5": 0.27031, - "precision_at_1": 0.03583, - "precision_at_10": 0.13151, - "precision_at_100": 0.01932, - "precision_at_1000": 0.0027, - "precision_at_20": 0.07531, - "precision_at_3": 0.26493, - "precision_at_5": 0.21197, - "recall_at_1": 0.01743, - "recall_at_10": 0.3639, - "recall_at_100": 0.50674, - "recall_at_1000": 0.67803, - "recall_at_20": 0.40787, - "recall_at_3": 0.24952, - "recall_at_5": 0.30796 ->>>>>>> d5bfecea0b9a038c9d004b1f061ee681f25aede6 } ] }, From 2cc7ffeaff4503a82de03dccb312d3cf4f53651d Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 25 Sep 2024 12:05:10 +0100 Subject: [PATCH 31/73] make lint --- mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py b/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py index 3b96babb81..de41731d73 100644 --- a/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py @@ -70,7 +70,7 @@ def __init__( self, model: EncoderWithQueryCorpusEncode, encode_kwargs: dict[str, Any] = {}, - corpus_chunk_size: int = 20000, + corpus_chunk_size: int = 2000, previous_results: str | None = None, **kwargs: Any, ): From d3f9db2b294ded7b59cb88494f94d6d04cf3ed3c Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 25 Sep 2024 12:11:01 +0100 Subject: [PATCH 32/73] reset corpus chunk size --- mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py b/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py index de41731d73..3b96babb81 100644 --- a/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/Image/Any2AnyRetrievalEvaluator.py @@ -70,7 +70,7 @@ def __init__( self, model: EncoderWithQueryCorpusEncode, encode_kwargs: dict[str, Any] = {}, - corpus_chunk_size: int = 2000, + corpus_chunk_size: int = 20000, previous_results: str | None = None, **kwargs: Any, ): From b94e4e2c1bd7308043a5dbf0bc171e549cef2abe Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 26 Sep 2024 02:18:13 +0100 Subject: [PATCH 33/73] remove wrong classification import --- mteb/tasks/Image/ImageClassification/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/tasks/Image/ImageClassification/__init__.py b/mteb/tasks/Image/ImageClassification/__init__.py index e658c625e5..c5a82f357d 100644 --- a/mteb/tasks/Image/ImageClassification/__init__.py +++ b/mteb/tasks/Image/ImageClassification/__init__.py @@ -11,7 +11,6 @@ from .eng.Food101Classification import * from .eng.GTSRBClassification import * from .eng.Imagenet1k import * -from .eng.Kinetics700Classification import * from .eng.MNISTClassification import * from .eng.OxfordFlowersClassification import * from .eng.OxfordPetsClassification import * From b54c227d93fe14ac4b5b1ee0a3cf7f943caa9ec6 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 27 Sep 2024 12:36:54 +0100 Subject: [PATCH 34/73] add Flickr30k T2I and I2T --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 2 + .../eng/Flickr30kI2TRetrieval.py | 43 ++++ .../Any2AnyRetrieval/eng/METI2IRetrieval.py | 2 +- .../Fashion200kI2TRetrieval.json | 186 ++++++++++++++++++ .../MNIST.json | 48 +++++ .../MNISTZeroShot.json | 19 ++ .../NIGHTSI2IRetrieval.json | 186 ++++++++++++++++++ .../OxfordFlowersClassification.json | 48 +++++ .../RenderedSST2.json | 19 ++ .../model_meta.json | 1 + .../RenderedSST2.json | 19 ++ .../model_meta.json | 1 + .../MNIST.json | 48 +++++ .../NIGHTSI2IRetrieval.json | 186 ++++++++++++++++++ .../OxfordFlowersClassification.json | 48 +++++ .../RenderedSST2.json | 19 ++ .../model_meta.json | 1 + .../MNISTZeroShot.json | 19 ++ .../Flickr30kI2TRetrieval.json | 186 ++++++++++++++++++ 19 files changed, 1080 insertions(+), 1 deletion(-) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/Fashion200kI2TRetrieval.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNIST.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNISTZeroShot.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/NIGHTSI2IRetrieval.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/OxfordFlowersClassification.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/RenderedSST2.json create mode 100644 results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/model_meta.json create mode 100644 results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/RenderedSST2.json create mode 100644 results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/model_meta.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNIST.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/NIGHTSI2IRetrieval.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/OxfordFlowersClassification.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/RenderedSST2.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/model_meta.json create mode 100644 results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 69f31aeb1b..e1241420f9 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -6,6 +6,8 @@ from .eng.Fashion200kT2IRetrieval import * from .eng.FashionIQIT2IRetrieval import * from .eng.FORBI2IRetrieval import * +from .eng.Flickr30kI2TRetrieval import * +from .eng.Flickr30kT2IRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * from .eng.InfoSeekIT2ITRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py new file mode 100644 index 0000000000..06cc32911b --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="Flickr30kI2TRetrieval", + description="Retrieve captions based on images.", + reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", + dataset={ + "path": "JamieSJS/flickr30k", + "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + }, + type="Retrieval", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2018-01-01", "2018-12-31"), + form=["written"], + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + socioeconomic_status="medium", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{Young2014FromID, + title={From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, + author={Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, + journal={Transactions of the Association for Computational Linguistics}, + year={2014}, + volume={2}, + pages={67-78}, + url={https://api.semanticscholar.org/CorpusID:3104920} +}""", + descriptive_stats={ + "n_samples": {"default": 155070}, # qrels + }, + ) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index 14fa576651..010e9a2849 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -11,7 +11,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://arxiv.org/abs/2202.01747", dataset={ "path": "JamieSJS/met", - "revision": "4e6db807efadebc1591332c3f5734f545b3dd258", + "revision": "0603c2ff57f406191e1510d15d3de82db2f720c9", }, type="Retrieval", category="i2i", diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/Fashion200kI2TRetrieval.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/Fashion200kI2TRetrieval.json new file mode 100644 index 0000000000..9225a6062a --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/Fashion200kI2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "96a313715ecf67f5dfe70c4fa52406bc7bdfbeee", + "evaluation_time": 181.5803382396698, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.03886, + "cv_recall_at_10": 0.15484, + "cv_recall_at_100": 0.39701, + "cv_recall_at_1000": 0.7658, + "cv_recall_at_20": 0.21334, + "cv_recall_at_3": 0.07568, + "cv_recall_at_5": 0.1035, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.08809, + "map_at_1": 0.03886, + "map_at_10": 0.06791, + "map_at_100": 0.07632, + "map_at_1000": 0.07774, + "map_at_20": 0.07197, + "map_at_3": 0.05489, + "map_at_5": 0.06124, + "mrr_at_1": 0.03886275311924729, + "mrr_at_10": 0.06791249874191167, + "mrr_at_100": 0.07631679834894635, + "mrr_at_1000": 0.07773539145986881, + "mrr_at_20": 0.0719660846478045, + "mrr_at_3": 0.05488511624735805, + "mrr_at_5": 0.06123610827026652, + "nauc_cv_recall_at_1000_diff1": 0.0218733952063869, + "nauc_cv_recall_at_1000_max": 0.16706592598167, + "nauc_cv_recall_at_1000_std": 0.33597938688032986, + "nauc_cv_recall_at_100_diff1": 0.07193545061359337, + "nauc_cv_recall_at_100_max": 0.03727221469537029, + "nauc_cv_recall_at_100_std": 0.1561269787000631, + "nauc_cv_recall_at_10_diff1": 0.0797701915257812, + "nauc_cv_recall_at_10_max": 0.020430887616611652, + "nauc_cv_recall_at_10_std": 0.06318639559887353, + "nauc_cv_recall_at_1_diff1": 0.18888757745716797, + "nauc_cv_recall_at_1_max": 0.03510598124425557, + "nauc_cv_recall_at_1_std": -0.0011818918393113542, + "nauc_cv_recall_at_20_diff1": 0.07098442271206883, + "nauc_cv_recall_at_20_max": 0.017202068195442434, + "nauc_cv_recall_at_20_std": 0.09427426491566825, + "nauc_cv_recall_at_3_diff1": 0.14514396355155457, + "nauc_cv_recall_at_3_max": 0.022088685493555183, + "nauc_cv_recall_at_3_std": 0.02219092083634193, + "nauc_cv_recall_at_5_diff1": 0.11843084765533575, + "nauc_cv_recall_at_5_max": 0.028181652993821218, + "nauc_cv_recall_at_5_std": 0.03378133752500267, + "nauc_map_at_1000_diff1": 0.1318292645003046, + "nauc_map_at_1000_max": 0.025968459599028312, + "nauc_map_at_1000_std": 0.03596161753365205, + "nauc_map_at_100_diff1": 0.13207865359812196, + "nauc_map_at_100_max": 0.02503274769433611, + "nauc_map_at_100_std": 0.034811909972038466, + "nauc_map_at_10_diff1": 0.13675377201318661, + "nauc_map_at_10_max": 0.023988302044714947, + "nauc_map_at_10_std": 0.024038623147938202, + "nauc_map_at_1_diff1": 0.18888757745716797, + "nauc_map_at_1_max": 0.03510598124425557, + "nauc_map_at_1_std": -0.0011818918393113542, + "nauc_map_at_20_diff1": 0.13291861173066144, + "nauc_map_at_20_max": 0.02328223207465244, + "nauc_map_at_20_std": 0.030123558300912807, + "nauc_map_at_3_diff1": 0.161277155146534, + "nauc_map_at_3_max": 0.024402585884692875, + "nauc_map_at_3_std": 0.010083727677318252, + "nauc_map_at_5_diff1": 0.1491311529395408, + "nauc_map_at_5_max": 0.026749124262949692, + "nauc_map_at_5_std": 0.015564562618014838, + "nauc_mrr_at_1000_diff1": 0.13183050469281174, + "nauc_mrr_at_1000_max": 0.025968889811347606, + "nauc_mrr_at_1000_std": 0.035962054245207556, + "nauc_mrr_at_100_diff1": 0.13207988989141017, + "nauc_mrr_at_100_max": 0.02503317589140467, + "nauc_mrr_at_100_std": 0.03481234450834155, + "nauc_mrr_at_10_diff1": 0.13675377201318661, + "nauc_mrr_at_10_max": 0.023988302044714947, + "nauc_mrr_at_10_std": 0.024038623147938202, + "nauc_mrr_at_1_diff1": 0.18888757745716797, + "nauc_mrr_at_1_max": 0.03510598124425557, + "nauc_mrr_at_1_std": -0.0011818918393113542, + "nauc_mrr_at_20_diff1": 0.13291861173066144, + "nauc_mrr_at_20_max": 0.02328223207465244, + "nauc_mrr_at_20_std": 0.030123558300912807, + "nauc_mrr_at_3_diff1": 0.161277155146534, + "nauc_mrr_at_3_max": 0.024402585884692875, + "nauc_mrr_at_3_std": 0.010083727677318252, + "nauc_mrr_at_5_diff1": 0.1491311529395408, + "nauc_mrr_at_5_max": 0.026749124262949692, + "nauc_mrr_at_5_std": 0.015564562618014838, + "nauc_ndcg_at_1000_diff1": 0.10002387998068846, + "nauc_ndcg_at_1000_max": 0.04997596715187386, + "nauc_ndcg_at_1000_std": 0.10395875200499329, + "nauc_ndcg_at_100_diff1": 0.10528225366714257, + "nauc_ndcg_at_100_max": 0.028766853087252058, + "nauc_ndcg_at_100_std": 0.08065845933284391, + "nauc_ndcg_at_10_diff1": 0.11527617893060818, + "nauc_ndcg_at_10_max": 0.022532453546031717, + "nauc_ndcg_at_10_std": 0.038512971156843226, + "nauc_ndcg_at_1_diff1": 0.18888757745716797, + "nauc_ndcg_at_1_max": 0.03510598124425557, + "nauc_ndcg_at_1_std": -0.0011818918393113542, + "nauc_ndcg_at_20_diff1": 0.10745173167011737, + "nauc_ndcg_at_20_max": 0.02078812380101046, + "nauc_ndcg_at_20_std": 0.05495270418039658, + "nauc_ndcg_at_3_diff1": 0.15602598314224742, + "nauc_ndcg_at_3_max": 0.023546212034123336, + "nauc_ndcg_at_3_std": 0.013879753309827844, + "nauc_ndcg_at_5_diff1": 0.13832101043102465, + "nauc_ndcg_at_5_max": 0.027109131323799444, + "nauc_ndcg_at_5_std": 0.021889859067966443, + "nauc_precision_at_1000_diff1": 0.02187339520638577, + "nauc_precision_at_1000_max": 0.1670659259816692, + "nauc_precision_at_1000_std": 0.3359793868803299, + "nauc_precision_at_100_diff1": 0.0719354506135936, + "nauc_precision_at_100_max": 0.03727221469537065, + "nauc_precision_at_100_std": 0.15612697870006317, + "nauc_precision_at_10_diff1": 0.07977019152578149, + "nauc_precision_at_10_max": 0.020430887616611822, + "nauc_precision_at_10_std": 0.06318639559887362, + "nauc_precision_at_1_diff1": 0.18888757745716797, + "nauc_precision_at_1_max": 0.03510598124425557, + "nauc_precision_at_1_std": -0.0011818918393113542, + "nauc_precision_at_20_diff1": 0.0709844227120689, + "nauc_precision_at_20_max": 0.017202068195442757, + "nauc_precision_at_20_std": 0.09427426491566834, + "nauc_precision_at_3_diff1": 0.14514396355155454, + "nauc_precision_at_3_max": 0.022088685493555214, + "nauc_precision_at_3_std": 0.022190920836341964, + "nauc_precision_at_5_diff1": 0.11843084765533585, + "nauc_precision_at_5_max": 0.028181652993821266, + "nauc_precision_at_5_std": 0.03378133752500258, + "nauc_recall_at_1000_diff1": 0.0218733952063869, + "nauc_recall_at_1000_max": 0.16706592598167, + "nauc_recall_at_1000_std": 0.33597938688032986, + "nauc_recall_at_100_diff1": 0.07193545061359337, + "nauc_recall_at_100_max": 0.03727221469537029, + "nauc_recall_at_100_std": 0.1561269787000631, + "nauc_recall_at_10_diff1": 0.0797701915257812, + "nauc_recall_at_10_max": 0.020430887616611652, + "nauc_recall_at_10_std": 0.06318639559887353, + "nauc_recall_at_1_diff1": 0.18888757745716797, + "nauc_recall_at_1_max": 0.03510598124425557, + "nauc_recall_at_1_std": -0.0011818918393113542, + "nauc_recall_at_20_diff1": 0.07098442271206883, + "nauc_recall_at_20_max": 0.017202068195442434, + "nauc_recall_at_20_std": 0.09427426491566825, + "nauc_recall_at_3_diff1": 0.14514396355155457, + "nauc_recall_at_3_max": 0.022088685493555183, + "nauc_recall_at_3_std": 0.02219092083634193, + "nauc_recall_at_5_diff1": 0.11843084765533575, + "nauc_recall_at_5_max": 0.028181652993821218, + "nauc_recall_at_5_std": 0.03378133752500267, + "ndcg_at_1": 0.03886, + "ndcg_at_10": 0.08809, + "ndcg_at_100": 0.13619, + "ndcg_at_1000": 0.18105, + "ndcg_at_20": 0.10288, + "ndcg_at_3": 0.06022, + "ndcg_at_5": 0.07167, + "precision_at_1": 0.03886, + "precision_at_10": 0.01548, + "precision_at_100": 0.00397, + "precision_at_1000": 0.00077, + "precision_at_20": 0.01067, + "precision_at_3": 0.02523, + "precision_at_5": 0.0207, + "recall_at_1": 0.03886, + "recall_at_10": 0.15484, + "recall_at_100": 0.39701, + "recall_at_1000": 0.7658, + "recall_at_20": 0.21334, + "recall_at_3": 0.07568, + "recall_at_5": 0.1035 + } + ] + }, + "task_name": "Fashion200kI2TRetrieval" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNIST.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNIST.json new file mode 100644 index 0000000000..e0ac824a95 --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNIST.json @@ -0,0 +1,48 @@ +{ + "dataset_revision": "b06aab39e05f7bcd9635d18ed25d06eae523c574", + "evaluation_time": 297.4648160934448, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.85734, + "f1": 0.8534562890978588, + "f1_weighted": 0.8556807192174982, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.85734, + "scores_per_experiment": [ + { + "accuracy": 0.8733, + "f1": 0.8705401169868281, + "f1_weighted": 0.8720762577071195 + }, + { + "accuracy": 0.8022, + "f1": 0.7946331419529729, + "f1_weighted": 0.7995418875379899 + }, + { + "accuracy": 0.8581, + "f1": 0.8542121810766361, + "f1_weighted": 0.8563303310872925 + }, + { + "accuracy": 0.878, + "f1": 0.8754710153202587, + "f1_weighted": 0.8767339121591414 + }, + { + "accuracy": 0.8751, + "f1": 0.8724249901525976, + "f1_weighted": 0.8737212075959484 + } + ] + } + ] + }, + "task_name": "MNIST" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNISTZeroShot.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNISTZeroShot.json new file mode 100644 index 0000000000..b058c0b7a3 --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/MNISTZeroShot.json @@ -0,0 +1,19 @@ +{ + "dataset_revision": "b06aab39e05f7bcd9635d18ed25d06eae523c574", + "evaluation_time": 231.2214720249176, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.5762, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.5762 + } + ] + }, + "task_name": "MNISTZeroShot" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/NIGHTSI2IRetrieval.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/NIGHTSI2IRetrieval.json new file mode 100644 index 0000000000..238904b1c3 --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/NIGHTSI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "c9583e052be7ad52d870c62a207a2e887ba9b8aa", + "evaluation_time": 1032.8710505962372, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.07925, + "cv_recall_at_10": 0.48821, + "cv_recall_at_100": 0.9316, + "cv_recall_at_1000": 0.99481, + "cv_recall_at_20": 0.69764, + "cv_recall_at_3": 0.20142, + "cv_recall_at_5": 0.30708, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.24968, + "map_at_1": 0.07925, + "map_at_10": 0.17764, + "map_at_100": 0.19909, + "map_at_1000": 0.19941, + "map_at_20": 0.1922, + "map_at_3": 0.12995, + "map_at_5": 0.15373, + "mrr_at_1": 0.07924528301886792, + "mrr_at_10": 0.1776362683438151, + "mrr_at_100": 0.19909399968942385, + "mrr_at_1000": 0.19940749680294112, + "mrr_at_20": 0.1921998785828074, + "mrr_at_3": 0.12995283018867934, + "mrr_at_5": 0.15372641509433924, + "nauc_cv_recall_at_1000_diff1": -0.6306765130294512, + "nauc_cv_recall_at_1000_max": 0.4255156608097682, + "nauc_cv_recall_at_1000_std": 0.8273066802478575, + "nauc_cv_recall_at_100_diff1": -0.20477156379793215, + "nauc_cv_recall_at_100_max": 0.3289094948324168, + "nauc_cv_recall_at_100_std": 0.3540262081844245, + "nauc_cv_recall_at_10_diff1": -0.0002766589364904998, + "nauc_cv_recall_at_10_max": 0.05573488312411784, + "nauc_cv_recall_at_10_std": -0.08384768736288326, + "nauc_cv_recall_at_1_diff1": 0.0778329308648601, + "nauc_cv_recall_at_1_max": 0.017467451160770153, + "nauc_cv_recall_at_1_std": -0.09230422783548616, + "nauc_cv_recall_at_20_diff1": -0.03663843577705176, + "nauc_cv_recall_at_20_max": 0.09299368334901667, + "nauc_cv_recall_at_20_std": -0.06832906783953004, + "nauc_cv_recall_at_3_diff1": 0.031859526758706465, + "nauc_cv_recall_at_3_max": 0.0014239615902498597, + "nauc_cv_recall_at_3_std": -0.11472573740325614, + "nauc_cv_recall_at_5_diff1": 0.008440048599030739, + "nauc_cv_recall_at_5_max": 0.007294935526259778, + "nauc_cv_recall_at_5_std": -0.09334608645063842, + "nauc_map_at_1000_diff1": 0.03606146943658301, + "nauc_map_at_1000_max": 0.023462582774634224, + "nauc_map_at_1000_std": -0.09234216583449868, + "nauc_map_at_100_diff1": 0.035779413386981485, + "nauc_map_at_100_max": 0.0238874941734106, + "nauc_map_at_100_std": -0.09185556818208644, + "nauc_map_at_10_diff1": 0.03806883475990293, + "nauc_map_at_10_max": 0.020307608194289003, + "nauc_map_at_10_std": -0.09595197128913463, + "nauc_map_at_1_diff1": 0.0778329308648601, + "nauc_map_at_1_max": 0.017467451160770153, + "nauc_map_at_1_std": -0.09230422783548616, + "nauc_map_at_20_diff1": 0.03544017675471238, + "nauc_map_at_20_max": 0.02225673468383308, + "nauc_map_at_20_std": -0.09543789353271984, + "nauc_map_at_3_diff1": 0.05000879822749758, + "nauc_map_at_3_max": 0.00721164104466641, + "nauc_map_at_3_std": -0.10583005601628986, + "nauc_map_at_5_diff1": 0.04007888239793958, + "nauc_map_at_5_max": 0.009848450000869181, + "nauc_map_at_5_std": -0.09855947438234418, + "nauc_mrr_at_1000_diff1": 0.03606146943658301, + "nauc_mrr_at_1000_max": 0.023462582774634224, + "nauc_mrr_at_1000_std": -0.09234216583449868, + "nauc_mrr_at_100_diff1": 0.035779413386981485, + "nauc_mrr_at_100_max": 0.0238874941734106, + "nauc_mrr_at_100_std": -0.09185556818208644, + "nauc_mrr_at_10_diff1": 0.03806883475990293, + "nauc_mrr_at_10_max": 0.020307608194289003, + "nauc_mrr_at_10_std": -0.09595197128913463, + "nauc_mrr_at_1_diff1": 0.0778329308648601, + "nauc_mrr_at_1_max": 0.017467451160770153, + "nauc_mrr_at_1_std": -0.09230422783548616, + "nauc_mrr_at_20_diff1": 0.03544017675471238, + "nauc_mrr_at_20_max": 0.02225673468383308, + "nauc_mrr_at_20_std": -0.09543789353271984, + "nauc_mrr_at_3_diff1": 0.05000879822749758, + "nauc_mrr_at_3_max": 0.00721164104466641, + "nauc_mrr_at_3_std": -0.10583005601628986, + "nauc_mrr_at_5_diff1": 0.04007888239793958, + "nauc_mrr_at_5_max": 0.009848450000869181, + "nauc_mrr_at_5_std": -0.09855947438234418, + "nauc_ndcg_at_1000_diff1": 0.023142396023156372, + "nauc_ndcg_at_1000_max": 0.035375916635258325, + "nauc_ndcg_at_1000_std": -0.07958245047320736, + "nauc_ndcg_at_100_diff1": 0.015119475959438647, + "nauc_ndcg_at_100_max": 0.04731669040790856, + "nauc_ndcg_at_100_std": -0.0635412170264141, + "nauc_ndcg_at_10_diff1": 0.025144530735346976, + "nauc_ndcg_at_10_max": 0.03090893341127276, + "nauc_ndcg_at_10_std": -0.09226635650097872, + "nauc_ndcg_at_1_diff1": 0.0778329308648601, + "nauc_ndcg_at_1_max": 0.017467451160770153, + "nauc_ndcg_at_1_std": -0.09230422783548616, + "nauc_ndcg_at_20_diff1": 0.016460308621990286, + "nauc_ndcg_at_20_max": 0.038935887157304706, + "nauc_ndcg_at_20_std": -0.08916836990042935, + "nauc_ndcg_at_3_diff1": 0.04402254814866491, + "nauc_ndcg_at_3_max": 0.005282526107844626, + "nauc_ndcg_at_3_std": -0.10876252624429479, + "nauc_ndcg_at_5_diff1": 0.029223974622285064, + "nauc_ndcg_at_5_max": 0.0089255171139576, + "nauc_ndcg_at_5_std": -0.09706274219479245, + "nauc_precision_at_1000_diff1": -0.6306765130294122, + "nauc_precision_at_1000_max": 0.42551566080976494, + "nauc_precision_at_1000_std": 0.8273066802478539, + "nauc_precision_at_100_diff1": -0.20477156379793393, + "nauc_precision_at_100_max": 0.3289094948324158, + "nauc_precision_at_100_std": 0.3540262081844231, + "nauc_precision_at_10_diff1": -0.00027665893649036046, + "nauc_precision_at_10_max": 0.055734883124117916, + "nauc_precision_at_10_std": -0.08384768736288313, + "nauc_precision_at_1_diff1": 0.0778329308648601, + "nauc_precision_at_1_max": 0.017467451160770153, + "nauc_precision_at_1_std": -0.09230422783548616, + "nauc_precision_at_20_diff1": -0.036638435777051476, + "nauc_precision_at_20_max": 0.09299368334901673, + "nauc_precision_at_20_std": -0.06832906783952929, + "nauc_precision_at_3_diff1": 0.03185952675870671, + "nauc_precision_at_3_max": 0.0014239615902501407, + "nauc_precision_at_3_std": -0.11472573740325583, + "nauc_precision_at_5_diff1": 0.00844004859903081, + "nauc_precision_at_5_max": 0.0072949355262599425, + "nauc_precision_at_5_std": -0.09334608645063842, + "nauc_recall_at_1000_diff1": -0.6306765130294512, + "nauc_recall_at_1000_max": 0.4255156608097682, + "nauc_recall_at_1000_std": 0.8273066802478575, + "nauc_recall_at_100_diff1": -0.20477156379793215, + "nauc_recall_at_100_max": 0.3289094948324168, + "nauc_recall_at_100_std": 0.3540262081844245, + "nauc_recall_at_10_diff1": -0.0002766589364904998, + "nauc_recall_at_10_max": 0.05573488312411784, + "nauc_recall_at_10_std": -0.08384768736288326, + "nauc_recall_at_1_diff1": 0.0778329308648601, + "nauc_recall_at_1_max": 0.017467451160770153, + "nauc_recall_at_1_std": -0.09230422783548616, + "nauc_recall_at_20_diff1": -0.03663843577705176, + "nauc_recall_at_20_max": 0.09299368334901667, + "nauc_recall_at_20_std": -0.06832906783953004, + "nauc_recall_at_3_diff1": 0.031859526758706465, + "nauc_recall_at_3_max": 0.0014239615902498597, + "nauc_recall_at_3_std": -0.11472573740325614, + "nauc_recall_at_5_diff1": 0.008440048599030739, + "nauc_recall_at_5_max": 0.007294935526259778, + "nauc_recall_at_5_std": -0.09334608645063842, + "ndcg_at_1": 0.07925, + "ndcg_at_10": 0.24968, + "ndcg_at_100": 0.34784, + "ndcg_at_1000": 0.35593, + "ndcg_at_20": 0.30266, + "ndcg_at_3": 0.14817, + "ndcg_at_5": 0.19136, + "precision_at_1": 0.07925, + "precision_at_10": 0.04882, + "precision_at_100": 0.00932, + "precision_at_1000": 0.00099, + "precision_at_20": 0.03488, + "precision_at_3": 0.06714, + "precision_at_5": 0.06142, + "recall_at_1": 0.07925, + "recall_at_10": 0.48821, + "recall_at_100": 0.9316, + "recall_at_1000": 0.99481, + "recall_at_20": 0.69764, + "recall_at_3": 0.20142, + "recall_at_5": 0.30708 + } + ] + }, + "task_name": "NIGHTSI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/OxfordFlowersClassification.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/OxfordFlowersClassification.json new file mode 100644 index 0000000000..6b9742156a --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/OxfordFlowersClassification.json @@ -0,0 +1,48 @@ +{ + "dataset_revision": "a37b1891609c0376fa81eced756e7863e1bd873b", + "evaluation_time": 351.2027907371521, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.8762745098039215, + "f1": 0.8707019249469784, + "f1_weighted": 0.8703980266170556, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.8762745098039215, + "scores_per_experiment": [ + { + "accuracy": 0.8784313725490196, + "f1": 0.8709034893034996, + "f1_weighted": 0.8708997095974342 + }, + { + "accuracy": 0.8823529411764706, + "f1": 0.879309449928005, + "f1_weighted": 0.8791989291482171 + }, + { + "accuracy": 0.865686274509804, + "f1": 0.8593490468264394, + "f1_weighted": 0.858799777548312 + }, + { + "accuracy": 0.8715686274509804, + "f1": 0.866195738562844, + "f1_weighted": 0.8655759996721402 + }, + { + "accuracy": 0.8833333333333333, + "f1": 0.8777519001141039, + "f1_weighted": 0.877515717119174 + } + ] + } + ] + }, + "task_name": "OxfordFlowersClassification" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/RenderedSST2.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/RenderedSST2.json new file mode 100644 index 0000000000..bce1d97cbb --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/RenderedSST2.json @@ -0,0 +1,19 @@ +{ + "dataset_revision": "66b9a461eda025201dd147e5f390f5984c33643a", + "evaluation_time": 34.560497760772705, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.49203734211971445, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.49203734211971445 + } + ] + }, + "task_name": "RenderedSST2" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/model_meta.json b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/model_meta.json new file mode 100644 index 0000000000..2445c6c1a5 --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-base-coco/7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f/model_meta.json @@ -0,0 +1 @@ +{"name": "Salesforce/blip-itm-base-coco", "revision": "7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f", "release_date": "2023-08-01", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "BLIPModelWrapper"} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/RenderedSST2.json b/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/RenderedSST2.json new file mode 100644 index 0000000000..79964f28b8 --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/RenderedSST2.json @@ -0,0 +1,19 @@ +{ + "dataset_revision": "66b9a461eda025201dd147e5f390f5984c33643a", + "evaluation_time": 76.84005379676819, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.500823723228995, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.500823723228995 + } + ] + }, + "task_name": "RenderedSST2" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/model_meta.json b/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/model_meta.json new file mode 100644 index 0000000000..69acd258db --- /dev/null +++ b/results-mieb/Salesforce__blip-itm-large-coco/fef05cafc05298067cbbca00b125749394a77a6f/model_meta.json @@ -0,0 +1 @@ +{"name": "Salesforce/blip-itm-large-coco", "revision": "fef05cafc05298067cbbca00b125749394a77a6f", "release_date": "2023-08-01", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "BLIPModelWrapper"} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNIST.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNIST.json new file mode 100644 index 0000000000..ac83e833d4 --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNIST.json @@ -0,0 +1,48 @@ +{ + "dataset_revision": "b06aab39e05f7bcd9635d18ed25d06eae523c574", + "evaluation_time": 716.1278786659241, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.9285, + "f1": 0.9276672727830793, + "f1_weighted": 0.9282674252781511, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.9285, + "scores_per_experiment": [ + { + "accuracy": 0.9284, + "f1": 0.9278192557850607, + "f1_weighted": 0.9285648611883027 + }, + { + "accuracy": 0.9127, + "f1": 0.9108312967490848, + "f1_weighted": 0.9117442017991534 + }, + { + "accuracy": 0.9321, + "f1": 0.931401132729107, + "f1_weighted": 0.9320989666694753 + }, + { + "accuracy": 0.9354, + "f1": 0.9350616561525831, + "f1_weighted": 0.9354103679494147 + }, + { + "accuracy": 0.9339, + "f1": 0.9332230224995612, + "f1_weighted": 0.9335187287844099 + } + ] + } + ] + }, + "task_name": "MNIST" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/NIGHTSI2IRetrieval.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/NIGHTSI2IRetrieval.json new file mode 100644 index 0000000000..d452d34222 --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/NIGHTSI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "c9583e052be7ad52d870c62a207a2e887ba9b8aa", + "evaluation_time": 2631.542476415634, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.05943, + "cv_recall_at_10": 0.40142, + "cv_recall_at_100": 0.88679, + "cv_recall_at_1000": 0.99104, + "cv_recall_at_20": 0.59764, + "cv_recall_at_3": 0.16698, + "cv_recall_at_5": 0.24717, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.20235, + "map_at_1": 0.05943, + "map_at_10": 0.14229, + "map_at_100": 0.16407, + "map_at_1000": 0.16457, + "map_at_20": 0.1558, + "map_at_3": 0.10409, + "map_at_5": 0.12215, + "mrr_at_1": 0.059433962264150944, + "mrr_at_10": 0.14228998203054774, + "mrr_at_100": 0.16406642527076057, + "mrr_at_1000": 0.1645732756182432, + "mrr_at_20": 0.15579824218803512, + "mrr_at_3": 0.10408805031446555, + "mrr_at_5": 0.12215408805031443, + "nauc_cv_recall_at_1000_diff1": -0.26463216865694905, + "nauc_cv_recall_at_1000_max": 0.5278883483217763, + "nauc_cv_recall_at_1000_std": 0.7581453634085121, + "nauc_cv_recall_at_100_diff1": 0.014236330594184277, + "nauc_cv_recall_at_100_max": 0.3171598451327442, + "nauc_cv_recall_at_100_std": 0.37266908975979723, + "nauc_cv_recall_at_10_diff1": -0.002210170307048272, + "nauc_cv_recall_at_10_max": 0.10455296298298879, + "nauc_cv_recall_at_10_std": 0.02712166932863189, + "nauc_cv_recall_at_1_diff1": 0.06336429572276098, + "nauc_cv_recall_at_1_max": 0.07874966460960554, + "nauc_cv_recall_at_1_std": -0.029650640329470464, + "nauc_cv_recall_at_20_diff1": -0.015820807363866755, + "nauc_cv_recall_at_20_max": 0.12973690402299562, + "nauc_cv_recall_at_20_std": 0.04547749025861378, + "nauc_cv_recall_at_3_diff1": -0.0053137478758161424, + "nauc_cv_recall_at_3_max": 0.05351014259433647, + "nauc_cv_recall_at_3_std": -0.003411847027194816, + "nauc_cv_recall_at_5_diff1": 0.0004749523652767695, + "nauc_cv_recall_at_5_max": 0.0679335317764752, + "nauc_cv_recall_at_5_std": -0.0006472184948264357, + "nauc_map_at_1000_diff1": 0.016633966898488534, + "nauc_map_at_1000_max": 0.08068623366557101, + "nauc_map_at_1000_std": -0.0017960556231574202, + "nauc_map_at_100_diff1": 0.01661909546774381, + "nauc_map_at_100_max": 0.0811695484351021, + "nauc_map_at_100_std": -0.0010905676199076005, + "nauc_map_at_10_diff1": 0.017095173553333582, + "nauc_map_at_10_max": 0.07644073407248878, + "nauc_map_at_10_std": -0.007324964157984001, + "nauc_map_at_1_diff1": 0.06336429572276098, + "nauc_map_at_1_max": 0.07874966460960554, + "nauc_map_at_1_std": -0.029650640329470464, + "nauc_map_at_20_diff1": 0.016082386042899254, + "nauc_map_at_20_max": 0.07877478749954128, + "nauc_map_at_20_std": -0.0062248086434086415, + "nauc_map_at_3_diff1": 0.017880266235441065, + "nauc_map_at_3_max": 0.06255089186760056, + "nauc_map_at_3_std": -0.015893007578431876, + "nauc_map_at_5_diff1": 0.018500079315787177, + "nauc_map_at_5_max": 0.06573480071106033, + "nauc_map_at_5_std": -0.01479218234208926, + "nauc_mrr_at_1000_diff1": 0.016633966898488534, + "nauc_mrr_at_1000_max": 0.08068623366557101, + "nauc_mrr_at_1000_std": -0.0017960556231574202, + "nauc_mrr_at_100_diff1": 0.01661909546774381, + "nauc_mrr_at_100_max": 0.0811695484351021, + "nauc_mrr_at_100_std": -0.0010905676199076005, + "nauc_mrr_at_10_diff1": 0.017095173553333582, + "nauc_mrr_at_10_max": 0.07644073407248878, + "nauc_mrr_at_10_std": -0.007324964157984001, + "nauc_mrr_at_1_diff1": 0.06336429572276098, + "nauc_mrr_at_1_max": 0.07874966460960554, + "nauc_mrr_at_1_std": -0.029650640329470464, + "nauc_mrr_at_20_diff1": 0.016082386042899254, + "nauc_mrr_at_20_max": 0.07877478749954128, + "nauc_mrr_at_20_std": -0.0062248086434086415, + "nauc_mrr_at_3_diff1": 0.017880266235441065, + "nauc_mrr_at_3_max": 0.06255089186760056, + "nauc_mrr_at_3_std": -0.015893007578431876, + "nauc_mrr_at_5_diff1": 0.018500079315787177, + "nauc_mrr_at_5_max": 0.06573480071106033, + "nauc_mrr_at_5_std": -0.01479218234208926, + "nauc_ndcg_at_1000_diff1": 0.009960736287420825, + "nauc_ndcg_at_1000_max": 0.09531647626699342, + "nauc_ndcg_at_1000_std": 0.020883086622059264, + "nauc_ndcg_at_100_diff1": 0.011572508647446316, + "nauc_ndcg_at_100_max": 0.10937633012181702, + "nauc_ndcg_at_100_std": 0.042541337399181214, + "nauc_ndcg_at_10_diff1": 0.00952815561968612, + "nauc_ndcg_at_10_max": 0.08583341700567972, + "nauc_ndcg_at_10_std": 0.0047805496391580115, + "nauc_ndcg_at_1_diff1": 0.06336429572276098, + "nauc_ndcg_at_1_max": 0.07874966460960554, + "nauc_ndcg_at_1_std": -0.029650640329470464, + "nauc_ndcg_at_20_diff1": 0.005486565789239601, + "nauc_ndcg_at_20_max": 0.09377542572693116, + "nauc_ndcg_at_20_std": 0.009702452362775931, + "nauc_ndcg_at_3_diff1": 0.009923888782701359, + "nauc_ndcg_at_3_max": 0.0594695188640034, + "nauc_ndcg_at_3_std": -0.011751091386382948, + "nauc_ndcg_at_5_diff1": 0.01169898585355613, + "nauc_ndcg_at_5_max": 0.06611397346239468, + "nauc_ndcg_at_5_std": -0.009801904972316896, + "nauc_precision_at_1000_diff1": -0.26463216865691536, + "nauc_precision_at_1000_max": 0.5278883483217904, + "nauc_precision_at_1000_std": 0.7581453634085238, + "nauc_precision_at_100_diff1": 0.01423633059418428, + "nauc_precision_at_100_max": 0.31715984513274537, + "nauc_precision_at_100_std": 0.37266908975979857, + "nauc_precision_at_10_diff1": -0.002210170307048316, + "nauc_precision_at_10_max": 0.10455296298298879, + "nauc_precision_at_10_std": 0.027121669328632015, + "nauc_precision_at_1_diff1": 0.06336429572276098, + "nauc_precision_at_1_max": 0.07874966460960554, + "nauc_precision_at_1_std": -0.029650640329470464, + "nauc_precision_at_20_diff1": -0.015820807363866707, + "nauc_precision_at_20_max": 0.12973690402299493, + "nauc_precision_at_20_std": 0.04547749025861332, + "nauc_precision_at_3_diff1": -0.0053137478758161095, + "nauc_precision_at_3_max": 0.05351014259433666, + "nauc_precision_at_3_std": -0.0034118470271947503, + "nauc_precision_at_5_diff1": 0.00047495236527695035, + "nauc_precision_at_5_max": 0.06793353177647533, + "nauc_precision_at_5_std": -0.000647218494826177, + "nauc_recall_at_1000_diff1": -0.26463216865694905, + "nauc_recall_at_1000_max": 0.5278883483217763, + "nauc_recall_at_1000_std": 0.7581453634085121, + "nauc_recall_at_100_diff1": 0.014236330594184277, + "nauc_recall_at_100_max": 0.3171598451327442, + "nauc_recall_at_100_std": 0.37266908975979723, + "nauc_recall_at_10_diff1": -0.002210170307048272, + "nauc_recall_at_10_max": 0.10455296298298879, + "nauc_recall_at_10_std": 0.02712166932863189, + "nauc_recall_at_1_diff1": 0.06336429572276098, + "nauc_recall_at_1_max": 0.07874966460960554, + "nauc_recall_at_1_std": -0.029650640329470464, + "nauc_recall_at_20_diff1": -0.015820807363866755, + "nauc_recall_at_20_max": 0.12973690402299562, + "nauc_recall_at_20_std": 0.04547749025861378, + "nauc_recall_at_3_diff1": -0.0053137478758161424, + "nauc_recall_at_3_max": 0.05351014259433647, + "nauc_recall_at_3_std": -0.003411847027194816, + "nauc_recall_at_5_diff1": 0.0004749523652767695, + "nauc_recall_at_5_max": 0.0679335317764752, + "nauc_recall_at_5_std": -0.0006472184948264357, + "ndcg_at_1": 0.05943, + "ndcg_at_10": 0.20235, + "ndcg_at_100": 0.30717, + "ndcg_at_1000": 0.32046, + "ndcg_at_20": 0.25183, + "ndcg_at_3": 0.12012, + "ndcg_at_5": 0.15292, + "precision_at_1": 0.05943, + "precision_at_10": 0.04014, + "precision_at_100": 0.00887, + "precision_at_1000": 0.00099, + "precision_at_20": 0.02988, + "precision_at_3": 0.05566, + "precision_at_5": 0.04943, + "recall_at_1": 0.05943, + "recall_at_10": 0.40142, + "recall_at_100": 0.88679, + "recall_at_1000": 0.99104, + "recall_at_20": 0.59764, + "recall_at_3": 0.16698, + "recall_at_5": 0.24717 + } + ] + }, + "task_name": "NIGHTSI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/OxfordFlowersClassification.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/OxfordFlowersClassification.json new file mode 100644 index 0000000000..f49aa0a7d7 --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/OxfordFlowersClassification.json @@ -0,0 +1,48 @@ +{ + "dataset_revision": "a37b1891609c0376fa81eced756e7863e1bd873b", + "evaluation_time": 684.7742538452148, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.966078431372549, + "f1": 0.9665318717720475, + "f1_weighted": 0.96610958525568, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.966078431372549, + "scores_per_experiment": [ + { + "accuracy": 0.9686274509803922, + "f1": 0.969270715608082, + "f1_weighted": 0.9689539772636104 + }, + { + "accuracy": 0.9696078431372549, + "f1": 0.9698978889543093, + "f1_weighted": 0.9692619935891078 + }, + { + "accuracy": 0.9647058823529412, + "f1": 0.9649920270608272, + "f1_weighted": 0.9646380547095591 + }, + { + "accuracy": 0.9598039215686275, + "f1": 0.9602800604286633, + "f1_weighted": 0.9600010155147554 + }, + { + "accuracy": 0.9676470588235294, + "f1": 0.9682186668083551, + "f1_weighted": 0.9676928852013674 + } + ] + } + ] + }, + "task_name": "OxfordFlowersClassification" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/RenderedSST2.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/RenderedSST2.json new file mode 100644 index 0000000000..c593e878e3 --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/RenderedSST2.json @@ -0,0 +1,19 @@ +{ + "dataset_revision": "66b9a461eda025201dd147e5f390f5984c33643a", + "evaluation_time": 117.32345771789551, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.5178473366282262, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.5178473366282262 + } + ] + }, + "task_name": "RenderedSST2" +} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/model_meta.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/model_meta.json new file mode 100644 index 0000000000..92e563becc --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/model_meta.json @@ -0,0 +1 @@ +{"name": "Salesforce/blip2-opt-2.7b", "revision": "51572668da0eb669e01a189dc22abe6088589a24", "release_date": "2024-03-22", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "blip2_loader"} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json b/results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json new file mode 100644 index 0000000000..b0fd292049 --- /dev/null +++ b/results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json @@ -0,0 +1,19 @@ +{ + "dataset_revision": "b06aab39e05f7bcd9635d18ed25d06eae523c574", + "evaluation_time": 636.4780406951904, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.5049, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.5049 + } + ] + }, + "task_name": "MNISTZeroShot" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json new file mode 100644 index 0000000000..714a658b14 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + "evaluation_time": 2905.3177580833435, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.4111, + "cv_recall_at_10": 0.74505, + "cv_recall_at_100": 0.94796, + "cv_recall_at_1000": 0.99607, + "cv_recall_at_20": 0.82443, + "cv_recall_at_3": 0.58196, + "cv_recall_at_5": 0.65757, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.33866, + "map_at_1": 0.08222, + "map_at_10": 0.2347, + "map_at_100": 0.26833, + "map_at_1000": 0.27226, + "map_at_20": 0.25195, + "map_at_3": 0.16585, + "map_at_5": 0.20389, + "mrr_at_1": 0.4111046624105243, + "mrr_at_10": 0.5151111351658093, + "mrr_at_100": 0.5239514165075607, + "mrr_at_1000": 0.5242073041502724, + "mrr_at_20": 0.5206470443576335, + "mrr_at_3": 0.4860761806496125, + "mrr_at_5": 0.503347412996305, + "nauc_cv_recall_at_1000_diff1": 0.18030127835739224, + "nauc_cv_recall_at_1000_max": 0.5581466344401298, + "nauc_cv_recall_at_1000_std": 0.8631168401994536, + "nauc_cv_recall_at_100_diff1": 0.272670704205089, + "nauc_cv_recall_at_100_max": 0.4612079444879557, + "nauc_cv_recall_at_100_std": 0.5695575488041834, + "nauc_cv_recall_at_10_diff1": 0.28543669447055997, + "nauc_cv_recall_at_10_max": 0.36347027563947776, + "nauc_cv_recall_at_10_std": 0.2385430096241168, + "nauc_cv_recall_at_1_diff1": 0.3784400483602523, + "nauc_cv_recall_at_1_max": 0.3504797969786945, + "nauc_cv_recall_at_1_std": 0.10634115715764078, + "nauc_cv_recall_at_20_diff1": 0.27508492044537997, + "nauc_cv_recall_at_20_max": 0.3714030729491561, + "nauc_cv_recall_at_20_std": 0.2959015251319611, + "nauc_cv_recall_at_3_diff1": 0.31233109201191445, + "nauc_cv_recall_at_3_max": 0.35059358000714524, + "nauc_cv_recall_at_3_std": 0.15142403433656834, + "nauc_cv_recall_at_5_diff1": 0.2974464195367057, + "nauc_cv_recall_at_5_max": 0.35509762440464737, + "nauc_cv_recall_at_5_std": 0.18640081229405794, + "nauc_map_at_1000_diff1": 0.2662124495945085, + "nauc_map_at_1000_max": 0.40406570060222413, + "nauc_map_at_1000_std": 0.1959460147481183, + "nauc_map_at_100_diff1": 0.2659471804759087, + "nauc_map_at_100_max": 0.40363986652542505, + "nauc_map_at_100_std": 0.1947488241847008, + "nauc_map_at_10_diff1": 0.2666647469717836, + "nauc_map_at_10_max": 0.39968979149613193, + "nauc_map_at_10_std": 0.16723592929025385, + "nauc_map_at_1_diff1": 0.37844004836025164, + "nauc_map_at_1_max": 0.3504797969786941, + "nauc_map_at_1_std": 0.1063411571576403, + "nauc_map_at_20_diff1": 0.2650108128343263, + "nauc_map_at_20_max": 0.4008348432286667, + "nauc_map_at_20_std": 0.17971414027637928, + "nauc_map_at_3_diff1": 0.28590676523718034, + "nauc_map_at_3_max": 0.3781596069511839, + "nauc_map_at_3_std": 0.13040101399163928, + "nauc_map_at_5_diff1": 0.27462944218025, + "nauc_map_at_5_max": 0.39938962890452623, + "nauc_map_at_5_std": 0.14980070399423312, + "nauc_mrr_at_1000_diff1": 0.3494548271148969, + "nauc_mrr_at_1000_max": 0.3521197992435183, + "nauc_mrr_at_1000_std": 0.1344856075273458, + "nauc_mrr_at_100_diff1": 0.349406446391289, + "nauc_mrr_at_100_max": 0.35218688878029347, + "nauc_mrr_at_100_std": 0.1347720116595227, + "nauc_mrr_at_10_diff1": 0.34832734062778187, + "nauc_mrr_at_10_max": 0.35184655254909514, + "nauc_mrr_at_10_std": 0.13412235150155113, + "nauc_mrr_at_1_diff1": 0.3784400483602523, + "nauc_mrr_at_1_max": 0.3504797969786945, + "nauc_mrr_at_1_std": 0.10634115715764078, + "nauc_mrr_at_20_diff1": 0.34874544122097734, + "nauc_mrr_at_20_max": 0.35198594125683075, + "nauc_mrr_at_20_std": 0.1348277834635207, + "nauc_mrr_at_3_diff1": 0.3502458632268556, + "nauc_mrr_at_3_max": 0.3503121014809114, + "nauc_mrr_at_3_std": 0.12461253730902709, + "nauc_mrr_at_5_diff1": 0.3483495916749157, + "nauc_mrr_at_5_max": 0.3512186490276025, + "nauc_mrr_at_5_std": 0.13065045260516017, + "nauc_ndcg_at_1000_diff1": 0.28085910013736953, + "nauc_ndcg_at_1000_max": 0.400173338146715, + "nauc_ndcg_at_1000_std": 0.2454880347293459, + "nauc_ndcg_at_100_diff1": 0.27614261400212725, + "nauc_ndcg_at_100_max": 0.3975804199137475, + "nauc_ndcg_at_100_std": 0.24550705671790238, + "nauc_ndcg_at_10_diff1": 0.2770040634103947, + "nauc_ndcg_at_10_max": 0.386892601870249, + "nauc_ndcg_at_10_std": 0.17593863508925003, + "nauc_ndcg_at_1_diff1": 0.3784400483602523, + "nauc_ndcg_at_1_max": 0.3504797969786945, + "nauc_ndcg_at_1_std": 0.10634115715764078, + "nauc_ndcg_at_20_diff1": 0.27393587281733567, + "nauc_ndcg_at_20_max": 0.3893030504620739, + "nauc_ndcg_at_20_std": 0.1976666061871801, + "nauc_ndcg_at_3_diff1": 0.29659323480247735, + "nauc_ndcg_at_3_max": 0.3710317896505282, + "nauc_ndcg_at_3_std": 0.1331916818585782, + "nauc_ndcg_at_5_diff1": 0.28580433201485833, + "nauc_ndcg_at_5_max": 0.3859484521144905, + "nauc_ndcg_at_5_std": 0.15313433521572764, + "nauc_precision_at_1000_diff1": 0.12752694892887595, + "nauc_precision_at_1000_max": 0.3590535825037547, + "nauc_precision_at_1000_std": 0.6227671512776303, + "nauc_precision_at_100_diff1": 0.18161894144710583, + "nauc_precision_at_100_max": 0.3507884567772516, + "nauc_precision_at_100_std": 0.39162073570098294, + "nauc_precision_at_10_diff1": 0.22500485735582146, + "nauc_precision_at_10_max": 0.37378539642037556, + "nauc_precision_at_10_std": 0.2035430362585049, + "nauc_precision_at_1_diff1": 0.3784400483602523, + "nauc_precision_at_1_max": 0.3504797969786945, + "nauc_precision_at_1_std": 0.10634115715764078, + "nauc_precision_at_20_diff1": 0.20677311842444712, + "nauc_precision_at_20_max": 0.3598552830533254, + "nauc_precision_at_20_std": 0.24219283310766115, + "nauc_precision_at_3_diff1": 0.26529469191714483, + "nauc_precision_at_3_max": 0.37325220096807005, + "nauc_precision_at_3_std": 0.14099019547945105, + "nauc_precision_at_5_diff1": 0.24620492543282482, + "nauc_precision_at_5_max": 0.3867515487955738, + "nauc_precision_at_5_std": 0.16846852569552279, + "nauc_recall_at_1000_diff1": 0.1275269489288761, + "nauc_recall_at_1000_max": 0.35905358250375646, + "nauc_recall_at_1000_std": 0.6227671512776317, + "nauc_recall_at_100_diff1": 0.1816189414471066, + "nauc_recall_at_100_max": 0.35078845677725146, + "nauc_recall_at_100_std": 0.3916207357009826, + "nauc_recall_at_10_diff1": 0.22500485735582146, + "nauc_recall_at_10_max": 0.37378539642037556, + "nauc_recall_at_10_std": 0.2035430362585049, + "nauc_recall_at_1_diff1": 0.37844004836025164, + "nauc_recall_at_1_max": 0.3504797969786941, + "nauc_recall_at_1_std": 0.1063411571576403, + "nauc_recall_at_20_diff1": 0.20677311842444712, + "nauc_recall_at_20_max": 0.3598552830533254, + "nauc_recall_at_20_std": 0.24219283310766115, + "nauc_recall_at_3_diff1": 0.2652946919171448, + "nauc_recall_at_3_max": 0.3732522009680695, + "nauc_recall_at_3_std": 0.1409901954794505, + "nauc_recall_at_5_diff1": 0.24620492543282482, + "nauc_recall_at_5_max": 0.3867515487955738, + "nauc_recall_at_5_std": 0.16846852569552279, + "ndcg_at_1": 0.4111, + "ndcg_at_10": 0.33866, + "ndcg_at_100": 0.44523, + "ndcg_at_1000": 0.49782, + "ndcg_at_20": 0.37695, + "ndcg_at_3": 0.33845, + "ndcg_at_5": 0.29078, + "precision_at_1": 0.4111, + "precision_at_10": 0.17182, + "precision_at_100": 0.03266, + "precision_at_1000": 0.00451, + "precision_at_20": 0.10825, + "precision_at_3": 0.31717, + "precision_at_5": 0.25644, + "recall_at_1": 0.08222, + "recall_at_10": 0.34363, + "recall_at_100": 0.65316, + "recall_at_1000": 0.90261, + "recall_at_20": 0.43299, + "recall_at_3": 0.1903, + "recall_at_5": 0.25644 + } + ] + }, + "task_name": "Flickr30kI2TRetrieval" +} \ No newline at end of file From 08ce105778e473661e8151cb6169ee8fa4348868 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 27 Sep 2024 14:47:24 +0100 Subject: [PATCH 35/73] add Flickr30k T2I retriebal --- .../Flickr30kT2IRetrieval.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json new file mode 100644 index 0000000000..60797e4e6b --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + "evaluation_time": 3504.4338762760162, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.2175, + "cv_recall_at_10": 0.50718, + "cv_recall_at_100": 0.80839, + "cv_recall_at_1000": 0.96492, + "cv_recall_at_20": 0.60324, + "cv_recall_at_3": 0.34774, + "cv_recall_at_5": 0.41235, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.3504, + "map_at_1": 0.2175, + "map_at_10": 0.3017, + "map_at_100": 0.31355, + "map_at_1000": 0.31429, + "map_at_20": 0.30838, + "map_at_3": 0.2743, + "map_at_5": 0.28901, + "mrr_at_1": 0.21749532469207455, + "mrr_at_10": 0.3017006349410669, + "mrr_at_100": 0.3135464607899773, + "mrr_at_1000": 0.31428602518127013, + "mrr_at_20": 0.30838107727990743, + "mrr_at_3": 0.2743008533780535, + "mrr_at_5": 0.28900711506631577, + "nauc_cv_recall_at_1000_diff1": 0.2632892740429544, + "nauc_cv_recall_at_1000_max": 0.4683201344263211, + "nauc_cv_recall_at_1000_std": 0.7033795380897453, + "nauc_cv_recall_at_100_diff1": 0.2647647634897656, + "nauc_cv_recall_at_100_max": 0.34490671196475553, + "nauc_cv_recall_at_100_std": 0.3944496412003796, + "nauc_cv_recall_at_10_diff1": 0.2995130617977379, + "nauc_cv_recall_at_10_max": 0.28870905172779393, + "nauc_cv_recall_at_10_std": 0.13919504988489, + "nauc_cv_recall_at_1_diff1": 0.46334150143887837, + "nauc_cv_recall_at_1_max": 0.3188934433784608, + "nauc_cv_recall_at_1_std": 0.03870010544883344, + "nauc_cv_recall_at_20_diff1": 0.2830079535289093, + "nauc_cv_recall_at_20_max": 0.29612199469809664, + "nauc_cv_recall_at_20_std": 0.19159692443206613, + "nauc_cv_recall_at_3_diff1": 0.3535295484138601, + "nauc_cv_recall_at_3_max": 0.29806929455574155, + "nauc_cv_recall_at_3_std": 0.07658894036209522, + "nauc_cv_recall_at_5_diff1": 0.32210572836847934, + "nauc_cv_recall_at_5_max": 0.2888711825797017, + "nauc_cv_recall_at_5_std": 0.09843675944719517, + "nauc_map_at_1000_diff1": 0.39832464563018605, + "nauc_map_at_1000_max": 0.3074377337641309, + "nauc_map_at_1000_std": 0.07177029946282956, + "nauc_map_at_100_diff1": 0.3980484146317034, + "nauc_map_at_100_max": 0.3074089025502927, + "nauc_map_at_100_std": 0.0721090495580087, + "nauc_map_at_10_diff1": 0.39759464570461306, + "nauc_map_at_10_max": 0.3059467184032683, + "nauc_map_at_10_std": 0.06729002523587067, + "nauc_map_at_1_diff1": 0.46334150143887837, + "nauc_map_at_1_max": 0.3188934433784608, + "nauc_map_at_1_std": 0.03870010544883344, + "nauc_map_at_20_diff1": 0.39724585077726443, + "nauc_map_at_20_max": 0.30662830181613326, + "nauc_map_at_20_std": 0.07029104503264444, + "nauc_map_at_3_diff1": 0.4087813508135372, + "nauc_map_at_3_max": 0.3082838424431037, + "nauc_map_at_3_std": 0.05612029637486883, + "nauc_map_at_5_diff1": 0.40044699102255726, + "nauc_map_at_5_max": 0.3058538687154621, + "nauc_map_at_5_std": 0.06168460440199823, + "nauc_mrr_at_1000_diff1": 0.3983251214518209, + "nauc_mrr_at_1000_max": 0.3074398467997274, + "nauc_mrr_at_1000_std": 0.0717737083834719, + "nauc_mrr_at_100_diff1": 0.3980488893862368, + "nauc_mrr_at_100_max": 0.3074110112149868, + "nauc_mrr_at_100_std": 0.07211245146163399, + "nauc_mrr_at_10_diff1": 0.39759509662724174, + "nauc_mrr_at_10_max": 0.3059487942694512, + "nauc_mrr_at_10_std": 0.06729335845497784, + "nauc_mrr_at_1_diff1": 0.46334150143887837, + "nauc_mrr_at_1_max": 0.3188934433784608, + "nauc_mrr_at_1_std": 0.03870010544883344, + "nauc_mrr_at_20_diff1": 0.3972463051807796, + "nauc_mrr_at_20_max": 0.30663039310948675, + "nauc_mrr_at_20_std": 0.07029440252517179, + "nauc_mrr_at_3_diff1": 0.4087846165854502, + "nauc_mrr_at_3_max": 0.3082911152291177, + "nauc_mrr_at_3_std": 0.0561317481536657, + "nauc_mrr_at_5_diff1": 0.40044780533219726, + "nauc_mrr_at_5_max": 0.30585567761349425, + "nauc_mrr_at_5_std": 0.061687450057817825, + "nauc_ndcg_at_1000_diff1": 0.3822651584130574, + "nauc_ndcg_at_1000_max": 0.3096027445146429, + "nauc_ndcg_at_1000_std": 0.09905420670861774, + "nauc_ndcg_at_100_diff1": 0.3751136878938184, + "nauc_ndcg_at_100_max": 0.3096593022400737, + "nauc_ndcg_at_100_std": 0.1114725296151852, + "nauc_ndcg_at_10_diff1": 0.3722441527278821, + "nauc_ndcg_at_10_max": 0.30140653687780977, + "nauc_ndcg_at_10_std": 0.08492137004866748, + "nauc_ndcg_at_1_diff1": 0.46334150143887837, + "nauc_ndcg_at_1_max": 0.3188934433784608, + "nauc_ndcg_at_1_std": 0.03870010544883344, + "nauc_ndcg_at_20_diff1": 0.3702205468461526, + "nauc_ndcg_at_20_max": 0.30363863160972876, + "nauc_ndcg_at_20_std": 0.09630513131602707, + "nauc_ndcg_at_3_diff1": 0.39360911171689245, + "nauc_ndcg_at_3_max": 0.30546309152189277, + "nauc_ndcg_at_3_std": 0.061672579869672305, + "nauc_ndcg_at_5_diff1": 0.37922470007538506, + "nauc_ndcg_at_5_max": 0.3012774286231696, + "nauc_ndcg_at_5_std": 0.07144813701782884, + "nauc_precision_at_1000_diff1": 0.2632892740429434, + "nauc_precision_at_1000_max": 0.46832013442631265, + "nauc_precision_at_1000_std": 0.7033795380897392, + "nauc_precision_at_100_diff1": 0.26476476348976524, + "nauc_precision_at_100_max": 0.3449067119647543, + "nauc_precision_at_100_std": 0.3944496412003801, + "nauc_precision_at_10_diff1": 0.2995130617977381, + "nauc_precision_at_10_max": 0.28870905172779415, + "nauc_precision_at_10_std": 0.13919504988489012, + "nauc_precision_at_1_diff1": 0.46334150143887837, + "nauc_precision_at_1_max": 0.3188934433784608, + "nauc_precision_at_1_std": 0.03870010544883344, + "nauc_precision_at_20_diff1": 0.2830079535289092, + "nauc_precision_at_20_max": 0.29612199469809636, + "nauc_precision_at_20_std": 0.19159692443206594, + "nauc_precision_at_3_diff1": 0.3535203790281254, + "nauc_precision_at_3_max": 0.29804922704221115, + "nauc_precision_at_3_std": 0.07655745610155888, + "nauc_precision_at_5_diff1": 0.32210572836847917, + "nauc_precision_at_5_max": 0.2888711825797015, + "nauc_precision_at_5_std": 0.09843675944719478, + "nauc_recall_at_1000_diff1": 0.2632892740429544, + "nauc_recall_at_1000_max": 0.4683201344263211, + "nauc_recall_at_1000_std": 0.7033795380897453, + "nauc_recall_at_100_diff1": 0.2647647634897656, + "nauc_recall_at_100_max": 0.34490671196475553, + "nauc_recall_at_100_std": 0.3944496412003796, + "nauc_recall_at_10_diff1": 0.2995130617977379, + "nauc_recall_at_10_max": 0.28870905172779393, + "nauc_recall_at_10_std": 0.13919504988489, + "nauc_recall_at_1_diff1": 0.46334150143887837, + "nauc_recall_at_1_max": 0.3188934433784608, + "nauc_recall_at_1_std": 0.03870010544883344, + "nauc_recall_at_20_diff1": 0.2830079535289093, + "nauc_recall_at_20_max": 0.29612199469809664, + "nauc_recall_at_20_std": 0.19159692443206613, + "nauc_recall_at_3_diff1": 0.3535203790281251, + "nauc_recall_at_3_max": 0.29804922704221065, + "nauc_recall_at_3_std": 0.07655745610155873, + "nauc_recall_at_5_diff1": 0.32210572836847934, + "nauc_recall_at_5_max": 0.2888711825797017, + "nauc_recall_at_5_std": 0.09843675944719517, + "ndcg_at_1": 0.2175, + "ndcg_at_10": 0.3504, + "ndcg_at_100": 0.41258, + "ndcg_at_1000": 0.43242, + "ndcg_at_20": 0.3747, + "ndcg_at_3": 0.29313, + "ndcg_at_5": 0.31969, + "precision_at_1": 0.2175, + "precision_at_10": 0.05072, + "precision_at_100": 0.00808, + "precision_at_1000": 0.00096, + "precision_at_20": 0.03016, + "precision_at_3": 0.11591, + "precision_at_5": 0.08247, + "recall_at_1": 0.2175, + "recall_at_10": 0.50718, + "recall_at_100": 0.80839, + "recall_at_1000": 0.96492, + "recall_at_20": 0.60324, + "recall_at_3": 0.34773, + "recall_at_5": 0.41235 + } + ] + }, + "task_name": "Flickr30kT2IRetrieval" +} \ No newline at end of file From 5a45bc477881e7101bc87218b341a84441cb164f Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 27 Sep 2024 15:36:16 +0100 Subject: [PATCH 36/73] reduced-size MET revision --- mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index 010e9a2849..e46b2635e5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -11,7 +11,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://arxiv.org/abs/2202.01747", dataset={ "path": "JamieSJS/met", - "revision": "0603c2ff57f406191e1510d15d3de82db2f720c9", + "revision": "08ceaa61c0d172214abb3b8e82971d8f69d2aec0", }, type="Retrieval", category="i2i", From 9b8c46d53128207f1adace03e10254759febc343 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 29 Sep 2024 17:47:57 +0100 Subject: [PATCH 37/73] fix: add Flickr30k T2I --- .../eng/Flickr30kT2IRetrieval.py | 50 +++++++++++++++++++ .../MNISTZeroShot.json | 0 2 files changed, 50 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py rename results-mieb/Salesforce__blip2-opt-2.7b/{ => 51572668da0eb669e01a189dc22abe6088589a24}/MNISTZeroShot.json (100%) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py new file mode 100644 index 0000000000..24501f316e --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="Flickr30kT2IRetrieval", + description="Retrieve images based on captions.", + reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", + dataset={ + "path": "JamieSJS/flickr30k", + "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + }, + type="Retrieval", + category="t2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2018-01-01", "2018-12-31"), + form=["written"], + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + socioeconomic_status="medium", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{Young2014FromID, + title={From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, + author={Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, + journal={Transactions of the Association for Computational Linguistics}, + year={2014}, + volume={2}, + pages={67-78}, + url={https://api.semanticscholar.org/CorpusID:3104920} +}""", + descriptive_stats={ + "n_samples": {"default": 31014}, # qrels + }, + ) + + def load_data(self, **kwargs): + super().load_data(**kwargs) + # swap corpus and query + for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): + self.queries[split], self.corpus[split] = self.corpus[split], self.queries[split] + self.relevant_docs[split] = {cid: {qid: score} for qid, cid_score in self.relevant_docs[split].items() for cid, score in cid_score.items()} \ No newline at end of file diff --git a/results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json b/results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNISTZeroShot.json similarity index 100% rename from results-mieb/Salesforce__blip2-opt-2.7b/MNISTZeroShot.json rename to results-mieb/Salesforce__blip2-opt-2.7b/51572668da0eb669e01a189dc22abe6088589a24/MNISTZeroShot.json From 65c2bfbac977b36549f9d494008135ab76331555 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 29 Sep 2024 17:51:38 +0100 Subject: [PATCH 38/73] make lint --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 2 +- .../Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index e1241420f9..3e56bdf82c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -5,9 +5,9 @@ from .eng.Fashion200kI2TRetrieval import * from .eng.Fashion200kT2IRetrieval import * from .eng.FashionIQIT2IRetrieval import * -from .eng.FORBI2IRetrieval import * from .eng.Flickr30kI2TRetrieval import * from .eng.Flickr30kT2IRetrieval import * +from .eng.FORBI2IRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * from .eng.InfoSeekIT2ITRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py index 06cc32911b..f7278bcf37 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -38,6 +38,6 @@ class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): url={https://api.semanticscholar.org/CorpusID:3104920} }""", descriptive_stats={ - "n_samples": {"default": 155070}, # qrels + "n_samples": {"default": 155070}, # qrels }, ) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py index 24501f316e..44164c90b6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -38,7 +38,7 @@ class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): url={https://api.semanticscholar.org/CorpusID:3104920} }""", descriptive_stats={ - "n_samples": {"default": 31014}, # qrels + "n_samples": {"default": 31014}, # qrels }, ) @@ -46,5 +46,12 @@ def load_data(self, **kwargs): super().load_data(**kwargs) # swap corpus and query for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): - self.queries[split], self.corpus[split] = self.corpus[split], self.queries[split] - self.relevant_docs[split] = {cid: {qid: score} for qid, cid_score in self.relevant_docs[split].items() for cid, score in cid_score.items()} \ No newline at end of file + self.queries[split], self.corpus[split] = ( + self.corpus[split], + self.queries[split], + ) + self.relevant_docs[split] = { + cid: {qid: score} + for qid, cid_score in self.relevant_docs[split].items() + for cid, score in cid_score.items() + } From 8d79c5b3d50060a3b65a5616cc9773b6c8a36df8 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 1 Oct 2024 16:34:48 +0100 Subject: [PATCH 39/73] add two landmark datasets and results --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 2 + .../eng/ROxfordI2IRetrieval.py | 41 ++++ .../eng/RParisI2IRetrieval.py | 41 ++++ .../ROxfordI2IRetrieval.json | 186 ++++++++++++++++++ .../RParisI2IRetrieval.json | 186 ++++++++++++++++++ 5 files changed, 456 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 3e56bdf82c..72505e8a3c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -20,7 +20,9 @@ from .eng.NIGHTSI2IRetrieval import * from .eng.OVENIT2ITRetrieval import * from .eng.OVENIT2TRetrieval import * +from .eng.ROxfordI2IRetrieval import * from .eng.RP2kI2IRetrieval import * +from .eng.RParisI2IRetrieval import * from .eng.SciMMIRI2TRetrieval import * from .eng.SciMMIRT2IRetrieval import * from .eng.SOPI2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py new file mode 100644 index 0000000000..8ed178e605 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ROxfordI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="ROxfordI2IRetrieval", + description="Retrieve photos of landmarks in Oxford, UK.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-oxford", + "revision": "d8daad98b4e4896a7f7fa1b3485a22420242d4fc", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"default": 3555537}, + }, + ) + skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py new file mode 100644 index 0000000000..df31b527c8 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="RParisI2IRetrieval", + description="Retrieve photos of landmarks in Paris.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-paris", + "revision": "bafc3a08fdffd72558021ce3a41250833d7e0e88", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"default": 3555537}, + }, + ) + skip_first_result = True diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json new file mode 100644 index 0000000000..e47a943219 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "d8daad98b4e4896a7f7fa1b3485a22420242d4fc", + "evaluation_time": 29.71091365814209, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.80946, + "cv_recall_at_10": 0.99953, + "cv_recall_at_100": 1.0, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 1.0, + "cv_recall_at_3": 0.97636, + "cv_recall_at_5": 0.99291, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.80946, + "map_at_1": 0.00083, + "map_at_10": 0.00583, + "map_at_100": 0.04516, + "map_at_1000": 0.27358, + "map_at_20": 0.01067, + "map_at_3": 0.00211, + "map_at_5": 0.00324, + "mrr_at_1": 0.9985815602836879, + "mrr_at_10": 0.9992119779353821, + "mrr_at_100": 0.9992119779353821, + "mrr_at_1000": 0.9992119779353821, + "mrr_at_20": 0.9992119779353821, + "mrr_at_3": 0.9992119779353821, + "mrr_at_5": 0.9992119779353821, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": 0.5540638703677596, + "nauc_cv_recall_at_10_max": 0.7221878767269868, + "nauc_cv_recall_at_10_std": 0.869246700256431, + "nauc_cv_recall_at_1_diff1": -0.24158864035258856, + "nauc_cv_recall_at_1_max": -0.0420448021592233, + "nauc_cv_recall_at_1_std": 0.04484895202286055, + "nauc_cv_recall_at_20_diff1": NaN, + "nauc_cv_recall_at_20_max": NaN, + "nauc_cv_recall_at_20_std": NaN, + "nauc_cv_recall_at_3_diff1": -0.2813966714480976, + "nauc_cv_recall_at_3_max": -0.04571848249527183, + "nauc_cv_recall_at_3_std": 0.06606763479339506, + "nauc_cv_recall_at_5_diff1": -0.057737057928295976, + "nauc_cv_recall_at_5_max": 0.017855647720182007, + "nauc_cv_recall_at_5_std": 0.08427153819844212, + "nauc_map_at_1000_diff1": -0.3857995513654731, + "nauc_map_at_1000_max": 0.03808456962512866, + "nauc_map_at_1000_std": -0.06663520492889344, + "nauc_map_at_100_diff1": -0.34437629024420324, + "nauc_map_at_100_max": 0.020553049295469906, + "nauc_map_at_100_std": 0.0556790533768489, + "nauc_map_at_10_diff1": -0.18993364966776907, + "nauc_map_at_10_max": 0.006866799821313959, + "nauc_map_at_10_std": 0.13195237855420855, + "nauc_map_at_1_diff1": -0.05431107143472811, + "nauc_map_at_1_max": 0.016884860597154504, + "nauc_map_at_1_std": 0.11466554606849116, + "nauc_map_at_20_diff1": -0.2414863261116838, + "nauc_map_at_20_max": 0.008935096908909975, + "nauc_map_at_20_std": 0.11557906533166638, + "nauc_map_at_3_diff1": -0.12217591294530498, + "nauc_map_at_3_max": 0.008152934040333987, + "nauc_map_at_3_std": 0.13797559913063503, + "nauc_map_at_5_diff1": -0.13888462292383946, + "nauc_map_at_5_max": 0.007634414930511178, + "nauc_map_at_5_std": 0.13391202111166425, + "nauc_mrr_at_1000_diff1": 0.9476986801026335, + "nauc_mrr_at_1000_max": 1.0, + "nauc_mrr_at_1000_std": 0.9166563630181881, + "nauc_mrr_at_100_diff1": 0.9476986801026335, + "nauc_mrr_at_100_max": 1.0, + "nauc_mrr_at_100_std": 0.9166563630181881, + "nauc_mrr_at_10_diff1": 0.9476986801026335, + "nauc_mrr_at_10_max": 1.0, + "nauc_mrr_at_10_std": 0.9166563630181881, + "nauc_mrr_at_1_diff1": 0.9564155667521437, + "nauc_mrr_at_1_max": 1.0, + "nauc_mrr_at_1_std": 0.9073959589089956, + "nauc_mrr_at_20_diff1": 0.9476986801026335, + "nauc_mrr_at_20_max": 1.0, + "nauc_mrr_at_20_std": 0.9166563630181881, + "nauc_mrr_at_3_diff1": 0.9476986801026335, + "nauc_mrr_at_3_max": 1.0, + "nauc_mrr_at_3_std": 0.9166563630181881, + "nauc_mrr_at_5_diff1": 0.9476986801026335, + "nauc_mrr_at_5_max": 1.0, + "nauc_mrr_at_5_std": 0.9166563630181881, + "nauc_ndcg_at_1000_diff1": -0.29077931122633116, + "nauc_ndcg_at_1000_max": 0.014960510380277934, + "nauc_ndcg_at_1000_std": -0.12483981180901933, + "nauc_ndcg_at_100_diff1": -0.3609816949819758, + "nauc_ndcg_at_100_max": 0.021419925854651446, + "nauc_ndcg_at_100_std": -0.17265752707444715, + "nauc_ndcg_at_10_diff1": -0.27457750496652206, + "nauc_ndcg_at_10_max": 0.0008466224260104365, + "nauc_ndcg_at_10_std": -0.045841557226464144, + "nauc_ndcg_at_1_diff1": 0.9564155667521437, + "nauc_ndcg_at_1_max": 1.0, + "nauc_ndcg_at_1_std": 0.9073959589089956, + "nauc_ndcg_at_20_diff1": -0.3094417772089175, + "nauc_ndcg_at_20_max": 0.009714640004268442, + "nauc_ndcg_at_20_std": -0.08586776230182966, + "nauc_ndcg_at_3_diff1": -0.23757585938494089, + "nauc_ndcg_at_3_max": -0.016387555353346198, + "nauc_ndcg_at_3_std": 0.030386940725562046, + "nauc_ndcg_at_5_diff1": -0.23196460003691158, + "nauc_ndcg_at_5_max": -0.013062943721282708, + "nauc_ndcg_at_5_std": -0.010500172553175749, + "nauc_precision_at_1000_diff1": -0.17619733662463302, + "nauc_precision_at_1000_max": 0.012339801300641195, + "nauc_precision_at_1000_std": -0.1876530479926194, + "nauc_precision_at_100_diff1": -0.3626475300981416, + "nauc_precision_at_100_max": 0.025040872201569076, + "nauc_precision_at_100_std": -0.18897442356689456, + "nauc_precision_at_10_diff1": -0.2782683949086005, + "nauc_precision_at_10_max": 0.006150543325428016, + "nauc_precision_at_10_std": -0.05974378846400665, + "nauc_precision_at_1_diff1": 0.9564155667521437, + "nauc_precision_at_1_max": 1.0, + "nauc_precision_at_1_std": 0.9073959589089956, + "nauc_precision_at_20_diff1": -0.31218967700544853, + "nauc_precision_at_20_max": 0.015101101880288523, + "nauc_precision_at_20_std": -0.10187042490803115, + "nauc_precision_at_3_diff1": -0.2418495400445544, + "nauc_precision_at_3_max": -0.017073069963463458, + "nauc_precision_at_3_std": 0.024904433150907852, + "nauc_precision_at_5_diff1": -0.23061542055201162, + "nauc_precision_at_5_max": -0.013293477378375076, + "nauc_precision_at_5_std": -0.02217796284197703, + "nauc_recall_at_1000_diff1": -0.1758800893625114, + "nauc_recall_at_1000_max": 0.025501344233019, + "nauc_recall_at_1000_std": 0.042539963927428814, + "nauc_recall_at_100_diff1": -0.2486522818740174, + "nauc_recall_at_100_max": 0.019163648439019607, + "nauc_recall_at_100_std": 0.08018065043566762, + "nauc_recall_at_10_diff1": -0.1643890021643603, + "nauc_recall_at_10_max": 0.015612163534741345, + "nauc_recall_at_10_std": 0.12932048693738263, + "nauc_recall_at_1_diff1": -0.05431107143472811, + "nauc_recall_at_1_max": 0.016884860597154504, + "nauc_recall_at_1_std": 0.11466554606849116, + "nauc_recall_at_20_diff1": -0.19900512249205043, + "nauc_recall_at_20_max": 0.013811903907386002, + "nauc_recall_at_20_std": 0.11661397478238389, + "nauc_recall_at_3_diff1": -0.11946531860608983, + "nauc_recall_at_3_max": 0.009964580844462759, + "nauc_recall_at_3_std": 0.13655833592145109, + "nauc_recall_at_5_diff1": -0.1255934399523269, + "nauc_recall_at_5_max": 0.012044411079830297, + "nauc_recall_at_5_std": 0.1305906448336473, + "ndcg_at_1": 0.99858, + "ndcg_at_10": 0.82209, + "ndcg_at_100": 0.74273, + "ndcg_at_1000": 0.56583, + "ndcg_at_20": 0.79415, + "ndcg_at_3": 0.89211, + "ndcg_at_5": 0.8591, + "precision_at_1": 0.99858, + "precision_at_10": 0.79054, + "precision_at_100": 0.72617, + "precision_at_1000": 0.49807, + "precision_at_20": 0.76664, + "precision_at_3": 0.86383, + "precision_at_5": 0.82752, + "recall_at_1": 0.00083, + "recall_at_10": 0.00647, + "recall_at_100": 0.05838, + "recall_at_1000": 0.3868, + "recall_at_20": 0.01248, + "recall_at_3": 0.00214, + "recall_at_5": 0.00341 + } + ] + }, + "task_name": "ROxfordI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json new file mode 100644 index 0000000000..0f309a07f3 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "bafc3a08fdffd72558021ce3a41250833d7e0e88", + "evaluation_time": 43.56014585494995, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.78348, + "cv_recall_at_10": 0.96558, + "cv_recall_at_100": 0.99812, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.98217, + "cv_recall_at_3": 0.89972, + "cv_recall_at_5": 0.93367, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.78348, + "map_at_1": 0.00183, + "map_at_10": 0.01259, + "map_at_100": 0.08554, + "map_at_1000": 0.22541, + "map_at_20": 0.02257, + "map_at_3": 0.0046, + "map_at_5": 0.00704, + "mrr_at_1": 0.9870150187734669, + "mrr_at_10": 0.9935075093867334, + "mrr_at_100": 0.9935075093867334, + "mrr_at_1000": 0.9935075093867334, + "mrr_at_20": 0.9935075093867334, + "mrr_at_3": 0.9935075093867334, + "mrr_at_5": 0.9935075093867334, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.12564420871970045, + "nauc_cv_recall_at_100_max": 0.3655746256621164, + "nauc_cv_recall_at_100_std": -0.2306365991736563, + "nauc_cv_recall_at_10_diff1": -0.5058662936618029, + "nauc_cv_recall_at_10_max": 0.09207458773253613, + "nauc_cv_recall_at_10_std": 0.4663743642051124, + "nauc_cv_recall_at_1_diff1": -0.4013157463788695, + "nauc_cv_recall_at_1_max": 0.013502025515305102, + "nauc_cv_recall_at_1_std": 0.32242775071432206, + "nauc_cv_recall_at_20_diff1": -0.5198855681306671, + "nauc_cv_recall_at_20_max": 0.10151852705481003, + "nauc_cv_recall_at_20_std": 0.41412564884036973, + "nauc_cv_recall_at_3_diff1": -0.5198700551458323, + "nauc_cv_recall_at_3_max": 0.025885833103837443, + "nauc_cv_recall_at_3_std": 0.4187563971498901, + "nauc_cv_recall_at_5_diff1": -0.5209006482350635, + "nauc_cv_recall_at_5_max": 0.051797269624278094, + "nauc_cv_recall_at_5_std": 0.4360707529546872, + "nauc_map_at_1000_diff1": -0.32518052685711313, + "nauc_map_at_1000_max": 0.0038005812075390727, + "nauc_map_at_1000_std": 0.43958508353436493, + "nauc_map_at_100_diff1": -0.324729620306574, + "nauc_map_at_100_max": 0.003712984860206863, + "nauc_map_at_100_std": 0.3530965583894132, + "nauc_map_at_10_diff1": -0.2865369016119553, + "nauc_map_at_10_max": 0.012519773293503903, + "nauc_map_at_10_std": 0.23475872496725028, + "nauc_map_at_1_diff1": -0.0855732613663, + "nauc_map_at_1_max": 0.023221460373481606, + "nauc_map_at_1_std": 0.03199412568851411, + "nauc_map_at_20_diff1": -0.3089685409009752, + "nauc_map_at_20_max": 0.008164069309369689, + "nauc_map_at_20_std": 0.28138880870717525, + "nauc_map_at_3_diff1": -0.21596259885289204, + "nauc_map_at_3_max": 0.026694429615608712, + "nauc_map_at_3_std": 0.13815328440691188, + "nauc_map_at_5_diff1": -0.2539278390954876, + "nauc_map_at_5_max": 0.01952941733527699, + "nauc_map_at_5_std": 0.18318190468395143, + "nauc_mrr_at_1000_diff1": 1.0, + "nauc_mrr_at_1000_max": -0.13028347793646336, + "nauc_mrr_at_1000_std": -0.28303491792776114, + "nauc_mrr_at_100_diff1": 1.0, + "nauc_mrr_at_100_max": -0.13028347793646336, + "nauc_mrr_at_100_std": -0.28303491792776114, + "nauc_mrr_at_10_diff1": 1.0, + "nauc_mrr_at_10_max": -0.13028347793646336, + "nauc_mrr_at_10_std": -0.28303491792776114, + "nauc_mrr_at_1_diff1": 1.0, + "nauc_mrr_at_1_max": -0.1302834779364721, + "nauc_mrr_at_1_std": -0.2830349179277583, + "nauc_mrr_at_20_diff1": 1.0, + "nauc_mrr_at_20_max": -0.13028347793646336, + "nauc_mrr_at_20_std": -0.28303491792776114, + "nauc_mrr_at_3_diff1": 1.0, + "nauc_mrr_at_3_max": -0.13028347793646336, + "nauc_mrr_at_3_std": -0.28303491792776114, + "nauc_mrr_at_5_diff1": 1.0, + "nauc_mrr_at_5_max": -0.13028347793646336, + "nauc_mrr_at_5_std": -0.28303491792776114, + "nauc_ndcg_at_1000_diff1": -0.3461037602745701, + "nauc_ndcg_at_1000_max": 0.01170706578078914, + "nauc_ndcg_at_1000_std": 0.40425946623874776, + "nauc_ndcg_at_100_diff1": -0.39901516979641344, + "nauc_ndcg_at_100_max": -0.00685883548003498, + "nauc_ndcg_at_100_std": 0.465350992380673, + "nauc_ndcg_at_10_diff1": -0.3955198632793018, + "nauc_ndcg_at_10_max": -0.0037363142445774643, + "nauc_ndcg_at_10_std": 0.38110734984115535, + "nauc_ndcg_at_1_diff1": 1.0, + "nauc_ndcg_at_1_max": -0.14486457971944697, + "nauc_ndcg_at_1_std": -0.2830349179277583, + "nauc_ndcg_at_20_diff1": -0.41278062763010503, + "nauc_ndcg_at_20_max": -0.004138702255930161, + "nauc_ndcg_at_20_std": 0.4191652248206659, + "nauc_ndcg_at_3_diff1": -0.34367743707797543, + "nauc_ndcg_at_3_max": 0.004103711367444123, + "nauc_ndcg_at_3_std": 0.3101278349486688, + "nauc_ndcg_at_5_diff1": -0.37388771810166976, + "nauc_ndcg_at_5_max": -0.004694760292318884, + "nauc_ndcg_at_5_std": 0.3429402757237829, + "nauc_precision_at_1000_diff1": -0.1804983224177905, + "nauc_precision_at_1000_max": -0.01148320196422333, + "nauc_precision_at_1000_std": 0.2504460959613102, + "nauc_precision_at_100_diff1": -0.3911848294276243, + "nauc_precision_at_100_max": -0.007414470176849031, + "nauc_precision_at_100_std": 0.4673613835957893, + "nauc_precision_at_10_diff1": -0.4052976521224889, + "nauc_precision_at_10_max": -0.00433906156493395, + "nauc_precision_at_10_std": 0.39212256495307285, + "nauc_precision_at_1_diff1": 1.0, + "nauc_precision_at_1_max": -0.14486457971944697, + "nauc_precision_at_1_std": -0.2830349179277583, + "nauc_precision_at_20_diff1": -0.4173913678934324, + "nauc_precision_at_20_max": -0.004543026048413097, + "nauc_precision_at_20_std": 0.4299497661264597, + "nauc_precision_at_3_diff1": -0.37619357586668295, + "nauc_precision_at_3_max": 0.006637408448532989, + "nauc_precision_at_3_std": 0.3255087096886432, + "nauc_precision_at_5_diff1": -0.393543490911112, + "nauc_precision_at_5_max": -0.0054501833188476565, + "nauc_precision_at_5_std": 0.3553468241787662, + "nauc_recall_at_1000_diff1": -0.28865460441970253, + "nauc_recall_at_1000_max": 0.02031269144932309, + "nauc_recall_at_1000_std": 0.3581111189760722, + "nauc_recall_at_100_diff1": -0.31737603693043515, + "nauc_recall_at_100_max": 0.010815413934469631, + "nauc_recall_at_100_std": 0.32072251393381573, + "nauc_recall_at_10_diff1": -0.2835809857944858, + "nauc_recall_at_10_max": 0.01631628873177338, + "nauc_recall_at_10_std": 0.2203863362341707, + "nauc_recall_at_1_diff1": -0.0855732613663, + "nauc_recall_at_1_max": 0.023221460373481606, + "nauc_recall_at_1_std": 0.03199412568851411, + "nauc_recall_at_20_diff1": -0.30410786586422606, + "nauc_recall_at_20_max": 0.012058890403941508, + "nauc_recall_at_20_std": 0.2609223499236706, + "nauc_recall_at_3_diff1": -0.22686364377869106, + "nauc_recall_at_3_max": 0.02838523233952593, + "nauc_recall_at_3_std": 0.13768932686394217, + "nauc_recall_at_5_diff1": -0.2568046605350692, + "nauc_recall_at_5_max": 0.02092009985156803, + "nauc_recall_at_5_std": 0.17610456507060399, + "ndcg_at_1": 0.98702, + "ndcg_at_10": 0.76647, + "ndcg_at_100": 0.59394, + "ndcg_at_1000": 0.44945, + "ndcg_at_20": 0.71423, + "ndcg_at_3": 0.8678, + "ndcg_at_5": 0.82287, + "precision_at_1": 0.98702, + "precision_at_10": 0.72226, + "precision_at_100": 0.5574, + "precision_at_1000": 0.25148, + "precision_at_20": 0.67042, + "precision_at_3": 0.83521, + "precision_at_5": 0.7832, + "recall_at_1": 0.00183, + "recall_at_10": 0.01349, + "recall_at_100": 0.10451, + "recall_at_1000": 0.40829, + "recall_at_20": 0.02511, + "recall_at_3": 0.00467, + "recall_at_5": 0.0073 + } + ] + }, + "task_name": "RParisI2IRetrieval" +} \ No newline at end of file From fe0d05354fc2bb0603f5cf1e2af2e3fa8e40a0f0 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 2 Oct 2024 15:45:11 +0100 Subject: [PATCH 40/73] add Sketchy i2i retrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../eng/SketchyI2IRetrieval.py | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 72505e8a3c..cb1b2dc8a1 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -25,6 +25,7 @@ from .eng.RParisI2IRetrieval import * from .eng.SciMMIRI2TRetrieval import * from .eng.SciMMIRT2IRetrieval import * +from .eng.SketchyI2IRetrieval import * from .eng.SOPI2IRetrieval import * from .eng.StanfordCarsI2IRetrieval import * from .eng.TUBerlinT2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py new file mode 100644 index 0000000000..c076c76e04 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SketchyI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="SketchyI2IRetrieval", + description="Retrieve photos from sketches.", + reference="https://arxiv.org/abs/2202.01747", + dataset={ + "path": "JamieSJS/sketchy", + "revision": "c8b8c1b7a2f0a92f1bfaaa1c9afc22aa42c61d5b", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2021-12-06", "2021-12-14"), # conference dates + domains=["Encyclopaedic"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{ypsilantis2021met, + title={The met dataset: Instance-level recognition for artworks}, + author={Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, + booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + year={2021} +} + """, + descriptive_stats={ + # "n_samples": {"default": 397121}, + }, + ) + skip_first_result = False From 72ace6e742676bfceb6c26f2e13bd416098e2906 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 14 Oct 2024 10:15:04 +0100 Subject: [PATCH 41/73] add task metadata --- .../Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py | 11 ++++++++++- .../Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py | 12 +++++++++++- .../Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py | 12 +++++++++++- .../Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 11 ++++++++++- .../Any2AnyRetrieval/eng/SketchyI2IRetrieval.py | 12 +++++++++++- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index 8ed178e605..39502a46d8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -35,7 +35,16 @@ class ROxfordI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"default": 3555537}, + "n_samples": {"test": 3555537}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 5063, + "num_queries": 5063, + "average_relevant_docs_per_query": 702, + } + }, }, ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py index 1335e11659..2bc04f94b7 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py @@ -34,7 +34,17 @@ class RP2kI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"default": 4409419}, + "n_samples": {"test": 39457}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 39457, + "num_queries": 39457, + "average_relevant_docs_per_query": 111.8, + } + }, }, + ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index df31b527c8..6b9a2040ca 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -35,7 +35,17 @@ class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"default": 3555537}, + "n_samples": {"test": 6392}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 6392, + "num_queries": 6392, + "average_relevant_docs_per_query": 734, + } + }, }, + ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index 9f3d771b0a..09b33aac7d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -35,7 +35,16 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"default": 120053}, + "n_samples": {"test": 120053}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 120053, + "num_queries": 120053, + "average_relevant_docs_per_query": 7, + } + }, }, ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py index c076c76e04..1d60a5d870 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py @@ -34,7 +34,17 @@ class SketchyI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - # "n_samples": {"default": 397121}, + "n_samples": {"test": 452886}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 7.24, + "num_documents": 25000, + "num_queries": 452886, + "average_relevant_docs_per_query": 3623.0, + } + }, }, + ) skip_first_result = False From 5afb2aa27fe7f3cab0cd4b5b0dd5bc7adf66f6e9 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 14 Oct 2024 11:37:25 +0100 Subject: [PATCH 42/73] add BLINKIT2IRetrieval dataset --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../eng/BLINKIT2IRetrieval.py | 49 +++++ .../Any2AnyRetrieval/eng/RP2kI2IRetrieval.py | 1 - .../eng/RParisI2IRetrieval.py | 1 - .../eng/SketchyI2IRetrieval.py | 1 - .../BLINKIT2IRetrieval.json | 186 ++++++++++++++++++ 6 files changed, 236 insertions(+), 3 deletions(-) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index cb1b2dc8a1..42bb40ee5d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +from .eng.BLINKIT2IRetrieval import * from .eng.CIRRIT2IRetrieval import * from .eng.CUB200I2IRetrieval import * from .eng.Fashion200kI2TRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py new file mode 100644 index 0000000000..50d011ab5c --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class BLINKIT2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="BLINKIT2IRetrieval", + description="Retrieve images based on images and specific retrieval instructions.", + reference="https://arxiv.org/abs/2404.12390", + dataset={ + "path": "JamieSJS/blink-it2i", + "revision": "359b66f11c25d19bc8f7108d98e660a5857f3d26", + "trust_remote_code": True, + }, + type="Retrieval", + category="it2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2018-01-01", "2018-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{fu2024blink, + title={Blink: Multimodal large language models can see but not perceive}, + author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal={arXiv preprint arXiv:2404.12390}, + year={2024} +} +""", + descriptive_stats={ + "n_samples": {"test": 402}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 804, + "num_queries": 402, + "average_relevant_docs_per_query": 2, + } + }, + }, + ) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py index 2bc04f94b7..321bb818be 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py @@ -45,6 +45,5 @@ class RP2kI2IRetrieval(AbsTaskAny2AnyRetrieval): } }, }, - ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index 6b9a2040ca..a112ded273 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -46,6 +46,5 @@ class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): } }, }, - ) skip_first_result = True diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py index 1d60a5d870..c89091f41d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py @@ -45,6 +45,5 @@ class SketchyI2IRetrieval(AbsTaskAny2AnyRetrieval): } }, }, - ) skip_first_result = False diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IRetrieval.json new file mode 100644 index 0000000000..1032c117ac --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "359b66f11c25d19bc8f7108d98e660a5857f3d26", + "evaluation_time": 17.393863439559937, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.31592, + "cv_recall_at_10": 0.56965, + "cv_recall_at_100": 0.87065, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.6393, + "cv_recall_at_3": 0.48507, + "cv_recall_at_5": 0.52736, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.44916, + "map_at_1": 0.31592, + "map_at_10": 0.40997, + "map_at_100": 0.42021, + "map_at_1000": 0.42102, + "map_at_20": 0.41454, + "map_at_3": 0.39469, + "map_at_5": 0.40427, + "mrr_at_1": 0.31592039800995025, + "mrr_at_10": 0.40997295269683326, + "mrr_at_100": 0.4202098745202131, + "mrr_at_1000": 0.42101657433711814, + "mrr_at_20": 0.41453745832471395, + "mrr_at_3": 0.3946932006633499, + "mrr_at_5": 0.4042703150912106, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.17659898624319145, + "nauc_cv_recall_at_100_max": 0.2661970196010441, + "nauc_cv_recall_at_100_std": -0.2790205688665586, + "nauc_cv_recall_at_10_diff1": 0.3814466970679155, + "nauc_cv_recall_at_10_max": 0.45289099967438606, + "nauc_cv_recall_at_10_std": -0.6696974920638539, + "nauc_cv_recall_at_1_diff1": 0.4348231455825518, + "nauc_cv_recall_at_1_max": 0.3562517088470133, + "nauc_cv_recall_at_1_std": -0.4716841092376067, + "nauc_cv_recall_at_20_diff1": 0.34765332655935005, + "nauc_cv_recall_at_20_max": 0.40507962086487265, + "nauc_cv_recall_at_20_std": -0.60412556449698, + "nauc_cv_recall_at_3_diff1": 0.4000259511772347, + "nauc_cv_recall_at_3_max": 0.46517944743370765, + "nauc_cv_recall_at_3_std": -0.6578600193275457, + "nauc_cv_recall_at_5_diff1": 0.4149222268518541, + "nauc_cv_recall_at_5_max": 0.4447326055964038, + "nauc_cv_recall_at_5_std": -0.6819130948957665, + "nauc_map_at_1000_diff1": 0.41844540783755707, + "nauc_map_at_1000_max": 0.40386353368016625, + "nauc_map_at_1000_std": -0.5528994661016456, + "nauc_map_at_100_diff1": 0.41795524243083654, + "nauc_map_at_100_max": 0.40346749605366333, + "nauc_map_at_100_std": -0.5524674902038105, + "nauc_map_at_10_diff1": 0.42120255585772387, + "nauc_map_at_10_max": 0.40787732867834814, + "nauc_map_at_10_std": -0.5613450956416107, + "nauc_map_at_1_diff1": 0.4348231455825518, + "nauc_map_at_1_max": 0.3562517088470133, + "nauc_map_at_1_std": -0.4716841092376067, + "nauc_map_at_20_diff1": 0.4188754182377973, + "nauc_map_at_20_max": 0.4046917634724885, + "nauc_map_at_20_std": -0.5563336557097699, + "nauc_map_at_3_diff1": 0.4214894251666346, + "nauc_map_at_3_max": 0.41188287008473307, + "nauc_map_at_3_std": -0.5590768607537941, + "nauc_map_at_5_diff1": 0.4246473310855139, + "nauc_map_at_5_max": 0.4070463857404686, + "nauc_map_at_5_std": -0.5636557349011302, + "nauc_mrr_at_1000_diff1": 0.41844540783755707, + "nauc_mrr_at_1000_max": 0.40386353368016625, + "nauc_mrr_at_1000_std": -0.5528994661016456, + "nauc_mrr_at_100_diff1": 0.41795524243083654, + "nauc_mrr_at_100_max": 0.40346749605366333, + "nauc_mrr_at_100_std": -0.5524674902038105, + "nauc_mrr_at_10_diff1": 0.42120255585772387, + "nauc_mrr_at_10_max": 0.40787732867834814, + "nauc_mrr_at_10_std": -0.5613450956416107, + "nauc_mrr_at_1_diff1": 0.4348231455825518, + "nauc_mrr_at_1_max": 0.3562517088470133, + "nauc_mrr_at_1_std": -0.4716841092376067, + "nauc_mrr_at_20_diff1": 0.4188754182377973, + "nauc_mrr_at_20_max": 0.4046917634724885, + "nauc_mrr_at_20_std": -0.5563336557097699, + "nauc_mrr_at_3_diff1": 0.4214894251666346, + "nauc_mrr_at_3_max": 0.41188287008473307, + "nauc_mrr_at_3_std": -0.5590768607537941, + "nauc_mrr_at_5_diff1": 0.4246473310855139, + "nauc_mrr_at_5_max": 0.4070463857404686, + "nauc_mrr_at_5_std": -0.5636557349011302, + "nauc_ndcg_at_1000_diff1": 0.410108945334173, + "nauc_ndcg_at_1000_max": 0.4059872490668924, + "nauc_ndcg_at_1000_std": -0.5567117452158448, + "nauc_ndcg_at_100_diff1": 0.3974622734711721, + "nauc_ndcg_at_100_max": 0.39776281190453455, + "nauc_ndcg_at_100_std": -0.5422948577574471, + "nauc_ndcg_at_10_diff1": 0.41250730473823416, + "nauc_ndcg_at_10_max": 0.4193748171136443, + "nauc_ndcg_at_10_std": -0.5882737032592944, + "nauc_ndcg_at_1_diff1": 0.4348231455825518, + "nauc_ndcg_at_1_max": 0.3562517088470133, + "nauc_ndcg_at_1_std": -0.4716841092376067, + "nauc_ndcg_at_20_diff1": 0.4050302095189553, + "nauc_ndcg_at_20_max": 0.40785434428016737, + "nauc_ndcg_at_20_std": -0.5711108827183825, + "nauc_ndcg_at_3_diff1": 0.41605079061886857, + "nauc_ndcg_at_3_max": 0.4259171131235778, + "nauc_ndcg_at_3_std": -0.5847193268248356, + "nauc_ndcg_at_5_diff1": 0.42200838628819426, + "nauc_ndcg_at_5_max": 0.41721735277614064, + "nauc_ndcg_at_5_std": -0.5934002799239759, + "nauc_precision_at_1000_diff1": NaN, + "nauc_precision_at_1000_max": NaN, + "nauc_precision_at_1000_std": NaN, + "nauc_precision_at_100_diff1": 0.17659898624319106, + "nauc_precision_at_100_max": 0.26619701960104514, + "nauc_precision_at_100_std": -0.279020568866558, + "nauc_precision_at_10_diff1": 0.38144669706791556, + "nauc_precision_at_10_max": 0.4528909996743864, + "nauc_precision_at_10_std": -0.6696974920638536, + "nauc_precision_at_1_diff1": 0.4348231455825518, + "nauc_precision_at_1_max": 0.3562517088470133, + "nauc_precision_at_1_std": -0.4716841092376067, + "nauc_precision_at_20_diff1": 0.34765332655935016, + "nauc_precision_at_20_max": 0.40507962086487265, + "nauc_precision_at_20_std": -0.60412556449698, + "nauc_precision_at_3_diff1": 0.4000259511772348, + "nauc_precision_at_3_max": 0.4651794474337073, + "nauc_precision_at_3_std": -0.6578600193275455, + "nauc_precision_at_5_diff1": 0.41492222685185387, + "nauc_precision_at_5_max": 0.444732605596404, + "nauc_precision_at_5_std": -0.6819130948957663, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.17659898624319145, + "nauc_recall_at_100_max": 0.2661970196010441, + "nauc_recall_at_100_std": -0.2790205688665586, + "nauc_recall_at_10_diff1": 0.3814466970679155, + "nauc_recall_at_10_max": 0.45289099967438606, + "nauc_recall_at_10_std": -0.6696974920638539, + "nauc_recall_at_1_diff1": 0.4348231455825518, + "nauc_recall_at_1_max": 0.3562517088470133, + "nauc_recall_at_1_std": -0.4716841092376067, + "nauc_recall_at_20_diff1": 0.34765332655935005, + "nauc_recall_at_20_max": 0.40507962086487265, + "nauc_recall_at_20_std": -0.60412556449698, + "nauc_recall_at_3_diff1": 0.4000259511772347, + "nauc_recall_at_3_max": 0.46517944743370765, + "nauc_recall_at_3_std": -0.6578600193275457, + "nauc_recall_at_5_diff1": 0.4149222268518541, + "nauc_recall_at_5_max": 0.4447326055964038, + "nauc_recall_at_5_std": -0.6819130948957665, + "ndcg_at_1": 0.31592, + "ndcg_at_10": 0.44916, + "ndcg_at_100": 0.50879, + "ndcg_at_1000": 0.52627, + "ndcg_at_20": 0.46643, + "ndcg_at_3": 0.41809, + "ndcg_at_5": 0.43543, + "precision_at_1": 0.31592, + "precision_at_10": 0.05697, + "precision_at_100": 0.00871, + "precision_at_1000": 0.001, + "precision_at_20": 0.03197, + "precision_at_3": 0.16169, + "precision_at_5": 0.10547, + "recall_at_1": 0.31592, + "recall_at_10": 0.56965, + "recall_at_100": 0.87065, + "recall_at_1000": 1.0, + "recall_at_20": 0.6393, + "recall_at_3": 0.48507, + "recall_at_5": 0.52736 + } + ] + }, + "task_name": "BLINKIT2IRetrieval" +} \ No newline at end of file From 4ba56b105c1266014a1d785f8937b97d124c6cd4 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 14 Oct 2024 12:07:16 +0100 Subject: [PATCH 43/73] add BLINKIT2TRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../eng/BLINKIT2IRetrieval.py | 2 +- .../eng/BLINKIT2TRetrieval.py | 49 +++++ .../BLINKIT2TRetrieval.json | 186 ++++++++++++++++++ 4 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 42bb40ee5d..6fa1ab2320 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations from .eng.BLINKIT2IRetrieval import * +from .eng.BLINKIT2TRetrieval import * from .eng.CIRRIT2IRetrieval import * from .eng.CUB200I2IRetrieval import * from .eng.Fashion200kI2TRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py index 50d011ab5c..ca265fd898 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py @@ -42,7 +42,7 @@ class BLINKIT2IRetrieval(AbsTaskAny2AnyRetrieval): "average_query_length": 0.0, "num_documents": 804, "num_queries": 402, - "average_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1, } }, }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py new file mode 100644 index 0000000000..2c652c6388 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class BLINKIT2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="BLINKIT2TRetrieval", + description="Retrieve images based on images and specific retrieval instructions.", + reference="https://arxiv.org/abs/2404.12390", + dataset={ + "path": "JamieSJS/blink-it2t", + "revision": "4ab83c87ac5b24e3b730f86d585671493a3a423c", + "trust_remote_code": True, + }, + type="Retrieval", + category="it2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2018-01-01", "2018-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{fu2024blink, + title={Blink: Multimodal large language models can see but not perceive}, + author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal={arXiv preprint arXiv:2404.12390}, + year={2024} +} +""", + descriptive_stats={ + "n_samples": {"test": 1073}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 3080, + "num_queries": 1073, + "average_relevant_docs_per_query": 1, + } + }, + }, + ) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json new file mode 100644 index 0000000000..d701facb1e --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "4ab83c87ac5b24e3b730f86d585671493a3a423c", + "evaluation_time": 13.768992900848389, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.00093, + "cv_recall_at_10": 0.00373, + "cv_recall_at_100": 0.06151, + "cv_recall_at_1000": 0.49301, + "cv_recall_at_20": 0.01025, + "cv_recall_at_3": 0.00093, + "cv_recall_at_5": 0.00093, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.00209, + "map_at_1": 0.00093, + "map_at_10": 0.00139, + "map_at_100": 0.00274, + "map_at_1000": 0.00397, + "map_at_20": 0.00176, + "map_at_3": 0.00093, + "map_at_5": 0.00093, + "mrr_at_1": 0.0009319664492078285, + "mrr_at_10": 0.0013147383837039009, + "mrr_at_100": 0.002685356046665323, + "mrr_at_1000": 0.003948307638249348, + "mrr_at_20": 0.0016977247814228705, + "mrr_at_3": 0.0009319664492078285, + "mrr_at_5": 0.0009319664492078285, + "nauc_cv_recall_at_1000_diff1": 0.21618180421784144, + "nauc_cv_recall_at_1000_max": 0.1966391139887544, + "nauc_cv_recall_at_1000_std": 0.6300616078101204, + "nauc_cv_recall_at_100_diff1": 0.17443594463110845, + "nauc_cv_recall_at_100_max": 0.3594175633623019, + "nauc_cv_recall_at_100_std": 0.5893797181186331, + "nauc_cv_recall_at_10_diff1": -0.05990075938028686, + "nauc_cv_recall_at_10_max": 0.43603863739514936, + "nauc_cv_recall_at_10_std": 0.6462545358013111, + "nauc_cv_recall_at_1_diff1": -0.49952309145315554, + "nauc_cv_recall_at_1_max": 0.6610449563695936, + "nauc_cv_recall_at_1_std": 1.0, + "nauc_cv_recall_at_20_diff1": 0.1175713728405099, + "nauc_cv_recall_at_20_max": 0.36443381136528874, + "nauc_cv_recall_at_20_std": 0.3549079762661633, + "nauc_cv_recall_at_3_diff1": -0.49952309145315554, + "nauc_cv_recall_at_3_max": 0.6610449563695936, + "nauc_cv_recall_at_3_std": 1.0, + "nauc_cv_recall_at_5_diff1": -0.49952309145315554, + "nauc_cv_recall_at_5_max": 0.6610449563695936, + "nauc_cv_recall_at_5_std": 1.0, + "nauc_map_at_1000_diff1": 0.3461462162176906, + "nauc_map_at_1000_max": 0.30890589543493097, + "nauc_map_at_1000_std": 0.707200194236762, + "nauc_map_at_100_diff1": 0.4005511014650457, + "nauc_map_at_100_max": 0.30811121696833377, + "nauc_map_at_100_std": 0.7144966661138995, + "nauc_map_at_10_diff1": 0.5208347947487291, + "nauc_map_at_10_max": 0.206028460069979, + "nauc_map_at_10_std": 0.8872521734282164, + "nauc_map_at_1_diff1": 0.6610449563695936, + "nauc_map_at_1_max": 0.09839818901557038, + "nauc_map_at_1_std": 1.0, + "nauc_map_at_20_diff1": 0.4542718670585475, + "nauc_map_at_20_max": 0.26815935842012756, + "nauc_map_at_20_std": 0.7972838331802248, + "nauc_map_at_3_diff1": 0.6610449563695936, + "nauc_map_at_3_max": 0.09839818901557038, + "nauc_map_at_3_std": 1.0, + "nauc_map_at_5_diff1": 0.6610449563695936, + "nauc_map_at_5_max": 0.09839818901557038, + "nauc_map_at_5_std": 1.0, + "nauc_mrr_at_1000_diff1": -0.013155580619472381, + "nauc_mrr_at_1000_max": 0.4252494452276833, + "nauc_mrr_at_1000_std": 0.687428882142861, + "nauc_mrr_at_100_diff1": -0.05688061033793547, + "nauc_mrr_at_100_max": 0.4528834485316897, + "nauc_mrr_at_100_std": 0.6942429482097452, + "nauc_mrr_at_10_diff1": -0.3361389575479778, + "nauc_mrr_at_10_max": 0.5699031562786795, + "nauc_mrr_at_10_std": 0.8567106980461006, + "nauc_mrr_at_1_diff1": -0.49952309145315554, + "nauc_mrr_at_1_max": 0.6610449563695936, + "nauc_mrr_at_1_std": 1.0, + "nauc_mrr_at_20_diff1": -0.2128688275475949, + "nauc_mrr_at_20_max": 0.5173348447205537, + "nauc_mrr_at_20_std": 0.6977606046374574, + "nauc_mrr_at_3_diff1": -0.49952309145315554, + "nauc_mrr_at_3_max": 0.6610449563695936, + "nauc_mrr_at_3_std": 1.0, + "nauc_mrr_at_5_diff1": -0.49952309145315554, + "nauc_mrr_at_5_max": 0.6610449563695936, + "nauc_mrr_at_5_std": 1.0, + "nauc_ndcg_at_1000_diff1": 0.2267671022987118, + "nauc_ndcg_at_1000_max": 0.2471557214473064, + "nauc_ndcg_at_1000_std": 0.6348682667422092, + "nauc_ndcg_at_100_diff1": 0.2912265250539601, + "nauc_ndcg_at_100_max": 0.3559110476318641, + "nauc_ndcg_at_100_std": 0.5901042407967306, + "nauc_ndcg_at_10_diff1": 0.42096720717204206, + "nauc_ndcg_at_10_max": 0.2838485324864617, + "nauc_ndcg_at_10_std": 0.7948899807288888, + "nauc_ndcg_at_1_diff1": 0.6610449563695936, + "nauc_ndcg_at_1_max": 0.09839818901557038, + "nauc_ndcg_at_1_std": 1.0, + "nauc_ndcg_at_20_diff1": 0.3330912830350423, + "nauc_ndcg_at_20_max": 0.36298197701132934, + "nauc_ndcg_at_20_std": 0.6588880474018856, + "nauc_ndcg_at_3_diff1": 0.6610449563695936, + "nauc_ndcg_at_3_max": 0.09839818901557038, + "nauc_ndcg_at_3_std": 1.0, + "nauc_ndcg_at_5_diff1": 0.6610449563695936, + "nauc_ndcg_at_5_max": 0.09839818901557038, + "nauc_ndcg_at_5_std": 1.0, + "nauc_precision_at_1000_diff1": 0.21618180421784106, + "nauc_precision_at_1000_max": 0.19663911398875364, + "nauc_precision_at_1000_std": 0.6300616078101199, + "nauc_precision_at_100_diff1": 0.2633008669431452, + "nauc_precision_at_100_max": 0.35813105058108047, + "nauc_precision_at_100_std": 0.5625569512563295, + "nauc_precision_at_10_diff1": 0.31227200390917714, + "nauc_precision_at_10_max": 0.3688258547365643, + "nauc_precision_at_10_std": 0.6910567239150112, + "nauc_precision_at_1_diff1": 0.6610449563695936, + "nauc_precision_at_1_max": 0.09839818901557038, + "nauc_precision_at_1_std": 1.0, + "nauc_precision_at_20_diff1": 0.24987842391547985, + "nauc_precision_at_20_max": 0.4262053224877857, + "nauc_precision_at_20_std": 0.5596936671568136, + "nauc_precision_at_3_diff1": 0.6610449563695937, + "nauc_precision_at_3_max": 0.09839818901557038, + "nauc_precision_at_3_std": 1.0, + "nauc_precision_at_5_diff1": 0.6610449563695937, + "nauc_precision_at_5_max": 0.09839818901557036, + "nauc_precision_at_5_std": 1.0, + "nauc_recall_at_1000_diff1": 0.21618180421784144, + "nauc_recall_at_1000_max": 0.1966391139887544, + "nauc_recall_at_1000_std": 0.6300616078101204, + "nauc_recall_at_100_diff1": 0.2633008669431453, + "nauc_recall_at_100_max": 0.3581310505810804, + "nauc_recall_at_100_std": 0.5625569512563297, + "nauc_recall_at_10_diff1": 0.3122720039091771, + "nauc_recall_at_10_max": 0.3688258547365644, + "nauc_recall_at_10_std": 0.691056723915011, + "nauc_recall_at_1_diff1": 0.6610449563695936, + "nauc_recall_at_1_max": 0.09839818901557038, + "nauc_recall_at_1_std": 1.0, + "nauc_recall_at_20_diff1": 0.24987842391547993, + "nauc_recall_at_20_max": 0.4262053224877856, + "nauc_recall_at_20_std": 0.5596936671568133, + "nauc_recall_at_3_diff1": 0.6610449563695936, + "nauc_recall_at_3_max": 0.09839818901557038, + "nauc_recall_at_3_std": 1.0, + "nauc_recall_at_5_diff1": 0.6610449563695936, + "nauc_recall_at_5_max": 0.09839818901557038, + "nauc_recall_at_5_std": 1.0, + "ndcg_at_1": 0.00093, + "ndcg_at_10": 0.00209, + "ndcg_at_100": 0.0128, + "ndcg_at_1000": 0.06231, + "ndcg_at_20": 0.00349, + "ndcg_at_3": 0.00093, + "ndcg_at_5": 0.00093, + "precision_at_1": 0.00093, + "precision_at_10": 0.00047, + "precision_at_100": 0.00065, + "precision_at_1000": 0.00049, + "precision_at_20": 0.00051, + "precision_at_3": 0.00031, + "precision_at_5": 0.00019, + "recall_at_1": 0.00093, + "recall_at_10": 0.00466, + "recall_at_100": 0.06524, + "recall_at_1000": 0.49301, + "recall_at_20": 0.01025, + "recall_at_3": 0.00093, + "recall_at_5": 0.00093 + } + ] + }, + "task_name": "BLINKIT2TRetrieval" +} \ No newline at end of file From dd99ddccb1f6fd18fe5eb9566da8e4a9cb3df0f2 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 14 Oct 2024 22:07:31 +0100 Subject: [PATCH 44/73] add ImageCoDeT2IRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../eng/ImageCoDeT2IRetrieval.py | 50 +++++ .../ImageCoDeT2IRetrieval.json | 186 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 6fa1ab2320..1e05ea1566 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -12,6 +12,7 @@ from .eng.FORBI2IRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * +from .eng.ImageCoDeT2IRetrieval import * from .eng.InfoSeekIT2ITRetrieval import * from .eng.InfoSeekIT2TRetrieval import * from .eng.MemotionI2TRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py new file mode 100644 index 0000000000..62541db290 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ImageCoDeT2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="ImageCoDeT2IRetrieval", + description="Retrieve a specific video frame based on a precise caption.", + reference="https://aclanthology.org/2022.acl-long.241.pdf", + dataset={ + "path": "JamieSJS/imagecode", + "revision": "a424cd523ffb157b69a875fb5e71c1d51be54089", + }, + type="Retrieval", + category="t2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2022-05-22", "2022-05-27"), # conference dates + form=["written"], + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + socioeconomic_status="medium", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{krojer2022image, + title={Image retrieval from contextual descriptions}, + author={Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, + journal={arXiv preprint arXiv:2203.15867}, + year={2022} +} +""", + descriptive_stats={ + "n_samples": {"test": 2302}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 23020, + "num_queries": 2302, + "average_relevant_docs_per_query": 1.0, + } + }, + }, + ) \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IRetrieval.json new file mode 100644 index 0000000000..13200b7196 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "a424cd523ffb157b69a875fb5e71c1d51be54089", + "evaluation_time": 82.47818303108215, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.0404, + "cv_recall_at_10": 0.19939, + "cv_recall_at_100": 0.4748, + "cv_recall_at_1000": 0.67811, + "cv_recall_at_20": 0.29018, + "cv_recall_at_3": 0.08905, + "cv_recall_at_5": 0.12858, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.1073, + "map_at_1": 0.0404, + "map_at_10": 0.07947, + "map_at_100": 0.09013, + "map_at_1000": 0.09115, + "map_at_20": 0.0858, + "map_at_3": 0.06118, + "map_at_5": 0.07015, + "mrr_at_1": 0.04039965247610773, + "mrr_at_10": 0.07948695406340925, + "mrr_at_100": 0.09015226912980688, + "mrr_at_1000": 0.09117557368465125, + "mrr_at_20": 0.08582291845210348, + "mrr_at_3": 0.06117868520127424, + "mrr_at_5": 0.07017086591369824, + "nauc_cv_recall_at_1000_diff1": 0.10207641080088928, + "nauc_cv_recall_at_1000_max": 0.47551950475797095, + "nauc_cv_recall_at_1000_std": 0.667878501495164, + "nauc_cv_recall_at_100_diff1": 0.10625659946342164, + "nauc_cv_recall_at_100_max": 0.429394897766257, + "nauc_cv_recall_at_100_std": 0.6428953991022838, + "nauc_cv_recall_at_10_diff1": 0.1561354794756864, + "nauc_cv_recall_at_10_max": 0.3230948830435383, + "nauc_cv_recall_at_10_std": 0.4849191311442782, + "nauc_cv_recall_at_1_diff1": 0.31664206223566443, + "nauc_cv_recall_at_1_max": 0.17966444353568511, + "nauc_cv_recall_at_1_std": 0.39566281510890866, + "nauc_cv_recall_at_20_diff1": 0.12129977218912101, + "nauc_cv_recall_at_20_max": 0.38830333447601323, + "nauc_cv_recall_at_20_std": 0.5541829899702385, + "nauc_cv_recall_at_3_diff1": 0.19875202658849309, + "nauc_cv_recall_at_3_max": 0.28302032898711493, + "nauc_cv_recall_at_3_std": 0.4420405932660536, + "nauc_cv_recall_at_5_diff1": 0.1777398492807893, + "nauc_cv_recall_at_5_max": 0.28188387839125345, + "nauc_cv_recall_at_5_std": 0.4596852170744158, + "nauc_map_at_1000_diff1": 0.21083155153769165, + "nauc_map_at_1000_max": 0.2761233937186182, + "nauc_map_at_1000_std": 0.4596747781481127, + "nauc_map_at_100_diff1": 0.21045007881515884, + "nauc_map_at_100_max": 0.2758409491731245, + "nauc_map_at_100_std": 0.45980619206042345, + "nauc_map_at_10_diff1": 0.2198233112203957, + "nauc_map_at_10_max": 0.25983030622113884, + "nauc_map_at_10_std": 0.4388620519482371, + "nauc_map_at_1_diff1": 0.31664206223566443, + "nauc_map_at_1_max": 0.17966444353568511, + "nauc_map_at_1_std": 0.39566281510890866, + "nauc_map_at_20_diff1": 0.21187076846238379, + "nauc_map_at_20_max": 0.2726996242735469, + "nauc_map_at_20_std": 0.4532884201759696, + "nauc_map_at_3_diff1": 0.2424283784615785, + "nauc_map_at_3_max": 0.2405242743804034, + "nauc_map_at_3_std": 0.4204628045064827, + "nauc_map_at_5_diff1": 0.22933322343545606, + "nauc_map_at_5_max": 0.24325667796160028, + "nauc_map_at_5_std": 0.4279703891935171, + "nauc_mrr_at_1000_diff1": 0.2111114841525623, + "nauc_mrr_at_1000_max": 0.27633286230256027, + "nauc_mrr_at_1000_std": 0.4599361510805012, + "nauc_mrr_at_100_diff1": 0.2107295407122133, + "nauc_mrr_at_100_max": 0.27604998501829475, + "nauc_mrr_at_100_std": 0.46006698826843867, + "nauc_mrr_at_10_diff1": 0.22007340887962168, + "nauc_mrr_at_10_max": 0.25997366670711813, + "nauc_mrr_at_10_std": 0.4390697248762385, + "nauc_mrr_at_1_diff1": 0.31664206223566443, + "nauc_mrr_at_1_max": 0.17966444353568511, + "nauc_mrr_at_1_std": 0.39566281510890866, + "nauc_mrr_at_20_diff1": 0.21215153596461336, + "nauc_mrr_at_20_max": 0.2729107668453846, + "nauc_mrr_at_20_std": 0.45355197906522926, + "nauc_mrr_at_3_diff1": 0.2424283784615785, + "nauc_mrr_at_3_max": 0.2405242743804034, + "nauc_mrr_at_3_std": 0.4204628045064827, + "nauc_mrr_at_5_diff1": 0.22960397137313956, + "nauc_mrr_at_5_max": 0.24341652796565305, + "nauc_mrr_at_5_std": 0.42819917818577374, + "nauc_ndcg_at_1000_diff1": 0.17173880474374906, + "nauc_ndcg_at_1000_max": 0.33895563139815, + "nauc_ndcg_at_1000_std": 0.5246453040907454, + "nauc_ndcg_at_100_diff1": 0.1683122269839326, + "nauc_ndcg_at_100_max": 0.33447412438250224, + "nauc_ndcg_at_100_std": 0.5261247976469097, + "nauc_ndcg_at_10_diff1": 0.1941087589715255, + "nauc_ndcg_at_10_max": 0.2852009961647385, + "nauc_ndcg_at_10_std": 0.4571235834817824, + "nauc_ndcg_at_1_diff1": 0.31664206223566443, + "nauc_ndcg_at_1_max": 0.17966444353568511, + "nauc_ndcg_at_1_std": 0.39566281510890866, + "nauc_ndcg_at_20_diff1": 0.17466391431078573, + "nauc_ndcg_at_20_max": 0.31877860873395164, + "nauc_ndcg_at_20_std": 0.49309984249311956, + "nauc_ndcg_at_3_diff1": 0.22782771989085718, + "nauc_ndcg_at_3_max": 0.25457117569804816, + "nauc_ndcg_at_3_std": 0.4275096555899013, + "nauc_ndcg_at_5_diff1": 0.21040054798962565, + "nauc_ndcg_at_5_max": 0.25741771300479144, + "nauc_ndcg_at_5_std": 0.4391783586085749, + "nauc_precision_at_1000_diff1": 0.10207641080088917, + "nauc_precision_at_1000_max": 0.47551950475797033, + "nauc_precision_at_1000_std": 0.6678785014951635, + "nauc_precision_at_100_diff1": 0.10625659946342192, + "nauc_precision_at_100_max": 0.4293948977662575, + "nauc_precision_at_100_std": 0.6428953991022842, + "nauc_precision_at_10_diff1": 0.15613547947568646, + "nauc_precision_at_10_max": 0.32309488304353845, + "nauc_precision_at_10_std": 0.4849191311442785, + "nauc_precision_at_1_diff1": 0.31664206223566443, + "nauc_precision_at_1_max": 0.17966444353568511, + "nauc_precision_at_1_std": 0.39566281510890866, + "nauc_precision_at_20_diff1": 0.12129977218912126, + "nauc_precision_at_20_max": 0.3883033344760135, + "nauc_precision_at_20_std": 0.5541829899702387, + "nauc_precision_at_3_diff1": 0.19875202658849306, + "nauc_precision_at_3_max": 0.283020328987115, + "nauc_precision_at_3_std": 0.44204059326605394, + "nauc_precision_at_5_diff1": 0.17773984928078942, + "nauc_precision_at_5_max": 0.2818838783912535, + "nauc_precision_at_5_std": 0.45968521707441584, + "nauc_recall_at_1000_diff1": 0.10207641080088928, + "nauc_recall_at_1000_max": 0.47551950475797095, + "nauc_recall_at_1000_std": 0.667878501495164, + "nauc_recall_at_100_diff1": 0.10625659946342164, + "nauc_recall_at_100_max": 0.429394897766257, + "nauc_recall_at_100_std": 0.6428953991022838, + "nauc_recall_at_10_diff1": 0.1561354794756864, + "nauc_recall_at_10_max": 0.3230948830435383, + "nauc_recall_at_10_std": 0.4849191311442782, + "nauc_recall_at_1_diff1": 0.31664206223566443, + "nauc_recall_at_1_max": 0.17966444353568511, + "nauc_recall_at_1_std": 0.39566281510890866, + "nauc_recall_at_20_diff1": 0.12129977218912101, + "nauc_recall_at_20_max": 0.38830333447601323, + "nauc_recall_at_20_std": 0.5541829899702385, + "nauc_recall_at_3_diff1": 0.19875202658849309, + "nauc_recall_at_3_max": 0.28302032898711493, + "nauc_recall_at_3_std": 0.4420405932660536, + "nauc_recall_at_5_diff1": 0.1777398492807893, + "nauc_recall_at_5_max": 0.28188387839125345, + "nauc_recall_at_5_std": 0.4596852170744158, + "ndcg_at_1": 0.0404, + "ndcg_at_10": 0.1073, + "ndcg_at_100": 0.16373, + "ndcg_at_1000": 0.19005, + "ndcg_at_20": 0.13029, + "ndcg_at_3": 0.06831, + "ndcg_at_5": 0.08453, + "precision_at_1": 0.0404, + "precision_at_10": 0.01994, + "precision_at_100": 0.00475, + "precision_at_1000": 0.00068, + "precision_at_20": 0.01451, + "precision_at_3": 0.02968, + "precision_at_5": 0.02572, + "recall_at_1": 0.0404, + "recall_at_10": 0.19939, + "recall_at_100": 0.4748, + "recall_at_1000": 0.67811, + "recall_at_20": 0.29018, + "recall_at_3": 0.08905, + "recall_at_5": 0.12858 + } + ] + }, + "task_name": "ImageCoDeT2IRetrieval" +} \ No newline at end of file From 411e313b43d548906c1a5fa464ad5b2ce3f3b139 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 14 Oct 2024 22:08:30 +0100 Subject: [PATCH 45/73] make lint --- .../tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py index 62541db290..3fae916f6b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py @@ -18,7 +18,7 @@ class ImageCoDeT2IRetrieval(AbsTaskAny2AnyRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - date=("2022-05-22", "2022-05-27"), # conference dates + date=("2022-05-22", "2022-05-27"), # conference dates form=["written"], domains=["Web"], task_subtypes=["Image Text Retrieval"], @@ -47,4 +47,4 @@ class ImageCoDeT2IRetrieval(AbsTaskAny2AnyRetrieval): } }, }, - ) \ No newline at end of file + ) From 74a2400ef9887d956f17746505e5638e1a324a88 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 15 Oct 2024 14:28:32 +0100 Subject: [PATCH 46/73] add vizwiz retrieval and results --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 2 + .../Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py | 50 +++++ .../eng/VizWizIT2TRetrieval.py | 51 +++++ .../VizWizIT2TRetrieval.json | 186 ++++++++++++++++++ 4 files changed, 289 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 1e05ea1566..98d1afcd2d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -34,6 +34,8 @@ from .eng.TUBerlinT2IRetrieval import * from .eng.VisualNewsI2TRetrieval import * from .eng.VisualNewsT2IRetrieval import * +from .eng.VizWizIT2TRetrieval import * +from .eng.VQA2IT2TRetrieval import * from .eng.WebQAT2ITRetrieval import * from .eng.WebQAT2TRetrieval import * from .multilingual.WITT2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py new file mode 100644 index 0000000000..0bfcee1c06 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class VQA2IT2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="VQA2IT2TRetrieval", + description="Retrieve the correct answer for a question about an image.", + reference="https://openaccess.thecvf.com/content_cvpr_2017/html/Goyal_Making_the_v_CVPR_2017_paper.html", + dataset={ + "path": "JamieSJS/vqa-2", + "revision": "addc919d80589a767212ca455657f4e6c55e71f8", + "trust_remote_code": True, + }, + type="Retrieval", + category="it2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2017-07-01", "2017-07-01"), + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@InProceedings{Goyal_2017_CVPR, +author = {Goyal, Yash and Khot, Tejas and Summers-Stay, Douglas and Batra, Dhruv and Parikh, Devi}, +title = {Making the v in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering}, +booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {July}, +year = {2017} +} +""", + descriptive_stats={ + "n_samples": {"test": 214354}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 2143540, + "num_queries": 214354, + "average_relevant_docs_per_query": 10, + } + }, + }, + ) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py new file mode 100644 index 0000000000..2860a244cc --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class VizWizIT2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="VizWizIT2TRetrieval", + description="Retrieve the correct answer for a question about an image.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/papers/Gurari_VizWiz_Grand_Challenge_CVPR_2018_paper.pdf", + dataset={ + "path": "JamieSJS/vizwiz", + "revision": "723c6ffed034eb9dcbb44777e1a9ddf97c5ee28a", + "trust_remote_code": True, + }, + type="Retrieval", + category="it2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2018-01-01", "2018-01-01"), + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@inproceedings{gurari2018vizwiz, + title={Vizwiz grand challenge: Answering visual questions from blind people}, + author={Gurari, Danna and Li, Qing and Stangl, Abigale J and Guo, Anhong and Lin, Chi and Grauman, Kristen and Luo, Jiebo and Bigham, Jeffrey P}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={3608--3617}, + year={2018} +} + +""", + descriptive_stats={ + "n_samples": {"test": 214354}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 2143540, + "num_queries": 214354, + "average_relevant_docs_per_query": 10, + } + }, + }, + ) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json new file mode 100644 index 0000000000..26983542b4 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "723c6ffed034eb9dcbb44777e1a9ddf97c5ee28a", + "evaluation_time": 32.021708726882935, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.0294, + "cv_recall_at_10": 0.07317, + "cv_recall_at_100": 0.19819, + "cv_recall_at_1000": 0.3737, + "cv_recall_at_20": 0.09863, + "cv_recall_at_3": 0.04608, + "cv_recall_at_5": 0.05788, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.01511, + "map_at_1": 0.00301, + "map_at_10": 0.00749, + "map_at_100": 0.01081, + "map_at_1000": 0.01176, + "map_at_20": 0.0088, + "map_at_3": 0.00507, + "map_at_5": 0.00605, + "mrr_at_1": 0.029404954850659876, + "mrr_at_10": 0.041434029041114, + "mrr_at_100": 0.045365167700329743, + "mrr_at_1000": 0.04612327351299033, + "mrr_at_20": 0.043187763329325876, + "mrr_at_3": 0.036736898973527825, + "mrr_at_5": 0.03942270587327312, + "nauc_cv_recall_at_1000_diff1": 0.11813681061217512, + "nauc_cv_recall_at_1000_max": -0.11894128215838488, + "nauc_cv_recall_at_1000_std": 0.3782492530808885, + "nauc_cv_recall_at_100_diff1": 0.11427439308010998, + "nauc_cv_recall_at_100_max": -0.09531881188079029, + "nauc_cv_recall_at_100_std": 0.2069207353440963, + "nauc_cv_recall_at_10_diff1": 0.20419699826470403, + "nauc_cv_recall_at_10_max": -0.11315602683152005, + "nauc_cv_recall_at_10_std": 0.11079116202458081, + "nauc_cv_recall_at_1_diff1": 0.3780410690211136, + "nauc_cv_recall_at_1_max": -0.15996276050998642, + "nauc_cv_recall_at_1_std": 0.12084151368405678, + "nauc_cv_recall_at_20_diff1": 0.17482215533278464, + "nauc_cv_recall_at_20_max": -0.09614325077967463, + "nauc_cv_recall_at_20_std": 0.11622067910258244, + "nauc_cv_recall_at_3_diff1": 0.3021499883385322, + "nauc_cv_recall_at_3_max": -0.1409817803959759, + "nauc_cv_recall_at_3_std": 0.12190544061690316, + "nauc_cv_recall_at_5_diff1": 0.2422495719721318, + "nauc_cv_recall_at_5_max": -0.11604909641982115, + "nauc_cv_recall_at_5_std": 0.12525785371426437, + "nauc_map_at_1000_diff1": 0.16429877210946825, + "nauc_map_at_1000_max": -0.1295306253626044, + "nauc_map_at_1000_std": 0.2062946969445036, + "nauc_map_at_100_diff1": 0.1670600341719391, + "nauc_map_at_100_max": -0.12982814569101003, + "nauc_map_at_100_std": 0.18876332652966407, + "nauc_map_at_10_diff1": 0.19138314983144086, + "nauc_map_at_10_max": -0.12835016161145998, + "nauc_map_at_10_std": 0.15183799317664892, + "nauc_map_at_1_diff1": 0.3572077845896727, + "nauc_map_at_1_max": -0.1591774719332963, + "nauc_map_at_1_std": 0.1257863823021691, + "nauc_map_at_20_diff1": 0.17047854383467648, + "nauc_map_at_20_max": -0.12495405745802321, + "nauc_map_at_20_std": 0.16271914594752626, + "nauc_map_at_3_diff1": 0.25952802533398156, + "nauc_map_at_3_max": -0.1473725924476619, + "nauc_map_at_3_std": 0.13084611961877016, + "nauc_map_at_5_diff1": 0.2102820356246173, + "nauc_map_at_5_max": -0.13351667721655666, + "nauc_map_at_5_std": 0.14460103788615644, + "nauc_mrr_at_1000_diff1": 0.2882430552866343, + "nauc_mrr_at_1000_max": -0.13678972890948193, + "nauc_mrr_at_1000_std": 0.12898324685751192, + "nauc_mrr_at_100_diff1": 0.2891333801979083, + "nauc_mrr_at_100_max": -0.1371078360866261, + "nauc_mrr_at_100_std": 0.12577922767556043, + "nauc_mrr_at_10_diff1": 0.30678787633680227, + "nauc_mrr_at_10_max": -0.1417415793576384, + "nauc_mrr_at_10_std": 0.11840012286416857, + "nauc_mrr_at_1_diff1": 0.3780410690211136, + "nauc_mrr_at_1_max": -0.15996276050998642, + "nauc_mrr_at_1_std": 0.12084151368405678, + "nauc_mrr_at_20_diff1": 0.29802916652653827, + "nauc_mrr_at_20_max": -0.1380992977602512, + "nauc_mrr_at_20_std": 0.11913643771545089, + "nauc_mrr_at_3_diff1": 0.34218208940138206, + "nauc_mrr_at_3_max": -0.15259828173240922, + "nauc_mrr_at_3_std": 0.11980199564671674, + "nauc_mrr_at_5_diff1": 0.31906571381939547, + "nauc_mrr_at_5_max": -0.14353131092973306, + "nauc_mrr_at_5_std": 0.12118536859900152, + "nauc_ndcg_at_1000_diff1": 0.1382546228263889, + "nauc_ndcg_at_1000_max": -0.1186022001179963, + "nauc_ndcg_at_1000_std": 0.29134546582965304, + "nauc_ndcg_at_100_diff1": 0.1608084546142371, + "nauc_ndcg_at_100_max": -0.1208877559776067, + "nauc_ndcg_at_100_std": 0.20420457544308251, + "nauc_ndcg_at_10_diff1": 0.21347296273381702, + "nauc_ndcg_at_10_max": -0.12360631260617351, + "nauc_ndcg_at_10_std": 0.13835357292918368, + "nauc_ndcg_at_1_diff1": 0.35720778458967256, + "nauc_ndcg_at_1_max": -0.15917747193329645, + "nauc_ndcg_at_1_std": 0.12578638230216885, + "nauc_ndcg_at_20_diff1": 0.189575288463788, + "nauc_ndcg_at_20_max": -0.12047147115102609, + "nauc_ndcg_at_20_std": 0.14823959023714925, + "nauc_ndcg_at_3_diff1": 0.28072213819651337, + "nauc_ndcg_at_3_max": -0.1493321151811889, + "nauc_ndcg_at_3_std": 0.1313988281635346, + "nauc_ndcg_at_5_diff1": 0.23487221921337986, + "nauc_ndcg_at_5_max": -0.12854699731824779, + "nauc_ndcg_at_5_std": 0.13838213366410212, + "nauc_precision_at_1000_diff1": 0.09153318291899752, + "nauc_precision_at_1000_max": -0.10712625620860772, + "nauc_precision_at_1000_std": 0.3486808234128117, + "nauc_precision_at_100_diff1": 0.11829783638630895, + "nauc_precision_at_100_max": -0.11229273014278543, + "nauc_precision_at_100_std": 0.2457874398625524, + "nauc_precision_at_10_diff1": 0.1586640506586383, + "nauc_precision_at_10_max": -0.10658265337446211, + "nauc_precision_at_10_std": 0.1433984006213967, + "nauc_precision_at_1_diff1": 0.35720778458967256, + "nauc_precision_at_1_max": -0.15917747193329645, + "nauc_precision_at_1_std": 0.12578638230216885, + "nauc_precision_at_20_diff1": 0.13529505359357108, + "nauc_precision_at_20_max": -0.10702832193841087, + "nauc_precision_at_20_std": 0.16042770655667163, + "nauc_precision_at_3_diff1": 0.2468790893829598, + "nauc_precision_at_3_max": -0.1425487542554598, + "nauc_precision_at_3_std": 0.1347458330420782, + "nauc_precision_at_5_diff1": 0.17923534723711776, + "nauc_precision_at_5_max": -0.10942897722644009, + "nauc_precision_at_5_std": 0.1456878106837016, + "nauc_recall_at_1000_diff1": 0.09153318291899752, + "nauc_recall_at_1000_max": -0.1071262562086077, + "nauc_recall_at_1000_std": 0.34868082341281187, + "nauc_recall_at_100_diff1": 0.11829783638630909, + "nauc_recall_at_100_max": -0.11229273014278539, + "nauc_recall_at_100_std": 0.24578743986255244, + "nauc_recall_at_10_diff1": 0.1586640506586383, + "nauc_recall_at_10_max": -0.10658265337446211, + "nauc_recall_at_10_std": 0.1433984006213967, + "nauc_recall_at_1_diff1": 0.3572077845896727, + "nauc_recall_at_1_max": -0.1591774719332963, + "nauc_recall_at_1_std": 0.1257863823021691, + "nauc_recall_at_20_diff1": 0.13529505359357108, + "nauc_recall_at_20_max": -0.10702832193841087, + "nauc_recall_at_20_std": 0.16042770655667163, + "nauc_recall_at_3_diff1": 0.24687908938295963, + "nauc_recall_at_3_max": -0.14254875425545974, + "nauc_recall_at_3_std": 0.1347458330420781, + "nauc_recall_at_5_diff1": 0.17923534723711776, + "nauc_recall_at_5_max": -0.10942897722644009, + "nauc_recall_at_5_std": 0.1456878106837016, + "ndcg_at_1": 0.0301, + "ndcg_at_10": 0.01511, + "ndcg_at_100": 0.03019, + "ndcg_at_1000": 0.04943, + "ndcg_at_20": 0.01882, + "ndcg_at_3": 0.02242, + "ndcg_at_5": 0.01915, + "precision_at_1": 0.0301, + "precision_at_10": 0.01197, + "precision_at_100": 0.00477, + "precision_at_1000": 0.0012, + "precision_at_20": 0.00935, + "precision_at_3": 0.02022, + "precision_at_5": 0.01635, + "recall_at_1": 0.00301, + "recall_at_10": 0.01197, + "recall_at_100": 0.04767, + "recall_at_1000": 0.11975, + "recall_at_20": 0.01871, + "recall_at_3": 0.00607, + "recall_at_5": 0.00817 + } + ] + }, + "task_name": "VizWizIT2TRetrieval" +} \ No newline at end of file From 8067b1798311e24a2cbffaec81c6af7d09765483 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 15 Oct 2024 15:08:18 +0100 Subject: [PATCH 47/73] fix vizwiz duplicate texts --- mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py index 2860a244cc..f2e94328c3 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py @@ -11,7 +11,7 @@ class VizWizIT2TRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/papers/Gurari_VizWiz_Grand_Challenge_CVPR_2018_paper.pdf", dataset={ "path": "JamieSJS/vizwiz", - "revision": "723c6ffed034eb9dcbb44777e1a9ddf97c5ee28a", + "revision": "044af162d55f82ab603fa16ffcf7f1e4dbf300e9", "trust_remote_code": True, }, type="Retrieval", From a80f3bb6d944ddc47800dba3535b96ae56e958e2 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 15 Oct 2024 15:14:49 +0100 Subject: [PATCH 48/73] add new vizwiz results --- .../VizWizIT2TRetrieval.json | 342 +++++++++--------- 1 file changed, 171 insertions(+), 171 deletions(-) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json index 26983542b4..03ec1474f1 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VizWizIT2TRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "723c6ffed034eb9dcbb44777e1a9ddf97c5ee28a", - "evaluation_time": 32.021708726882935, + "dataset_revision": "044af162d55f82ab603fa16ffcf7f1e4dbf300e9", + "evaluation_time": 30.84842872619629, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.0294, - "cv_recall_at_10": 0.07317, - "cv_recall_at_100": 0.19819, - "cv_recall_at_1000": 0.3737, - "cv_recall_at_20": 0.09863, - "cv_recall_at_3": 0.04608, - "cv_recall_at_5": 0.05788, + "cv_recall_at_1": 0.01343, + "cv_recall_at_10": 0.07131, + "cv_recall_at_100": 0.39569, + "cv_recall_at_1000": 0.91433, + "cv_recall_at_20": 0.12063, + "cv_recall_at_3": 0.0345, + "cv_recall_at_5": 0.04561, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.01511, - "map_at_1": 0.00301, - "map_at_10": 0.00749, - "map_at_100": 0.01081, - "map_at_1000": 0.01176, - "map_at_20": 0.0088, - "map_at_3": 0.00507, - "map_at_5": 0.00605, - "mrr_at_1": 0.029404954850659876, - "mrr_at_10": 0.041434029041114, - "mrr_at_100": 0.045365167700329743, - "mrr_at_1000": 0.04612327351299033, - "mrr_at_20": 0.043187763329325876, - "mrr_at_3": 0.036736898973527825, - "mrr_at_5": 0.03942270587327312, - "nauc_cv_recall_at_1000_diff1": 0.11813681061217512, - "nauc_cv_recall_at_1000_max": -0.11894128215838488, - "nauc_cv_recall_at_1000_std": 0.3782492530808885, - "nauc_cv_recall_at_100_diff1": 0.11427439308010998, - "nauc_cv_recall_at_100_max": -0.09531881188079029, - "nauc_cv_recall_at_100_std": 0.2069207353440963, - "nauc_cv_recall_at_10_diff1": 0.20419699826470403, - "nauc_cv_recall_at_10_max": -0.11315602683152005, - "nauc_cv_recall_at_10_std": 0.11079116202458081, - "nauc_cv_recall_at_1_diff1": 0.3780410690211136, - "nauc_cv_recall_at_1_max": -0.15996276050998642, - "nauc_cv_recall_at_1_std": 0.12084151368405678, - "nauc_cv_recall_at_20_diff1": 0.17482215533278464, - "nauc_cv_recall_at_20_max": -0.09614325077967463, - "nauc_cv_recall_at_20_std": 0.11622067910258244, - "nauc_cv_recall_at_3_diff1": 0.3021499883385322, - "nauc_cv_recall_at_3_max": -0.1409817803959759, - "nauc_cv_recall_at_3_std": 0.12190544061690316, - "nauc_cv_recall_at_5_diff1": 0.2422495719721318, - "nauc_cv_recall_at_5_max": -0.11604909641982115, - "nauc_cv_recall_at_5_std": 0.12525785371426437, - "nauc_map_at_1000_diff1": 0.16429877210946825, - "nauc_map_at_1000_max": -0.1295306253626044, - "nauc_map_at_1000_std": 0.2062946969445036, - "nauc_map_at_100_diff1": 0.1670600341719391, - "nauc_map_at_100_max": -0.12982814569101003, - "nauc_map_at_100_std": 0.18876332652966407, - "nauc_map_at_10_diff1": 0.19138314983144086, - "nauc_map_at_10_max": -0.12835016161145998, - "nauc_map_at_10_std": 0.15183799317664892, - "nauc_map_at_1_diff1": 0.3572077845896727, - "nauc_map_at_1_max": -0.1591774719332963, - "nauc_map_at_1_std": 0.1257863823021691, - "nauc_map_at_20_diff1": 0.17047854383467648, - "nauc_map_at_20_max": -0.12495405745802321, - "nauc_map_at_20_std": 0.16271914594752626, - "nauc_map_at_3_diff1": 0.25952802533398156, - "nauc_map_at_3_max": -0.1473725924476619, - "nauc_map_at_3_std": 0.13084611961877016, - "nauc_map_at_5_diff1": 0.2102820356246173, - "nauc_map_at_5_max": -0.13351667721655666, - "nauc_map_at_5_std": 0.14460103788615644, - "nauc_mrr_at_1000_diff1": 0.2882430552866343, - "nauc_mrr_at_1000_max": -0.13678972890948193, - "nauc_mrr_at_1000_std": 0.12898324685751192, - "nauc_mrr_at_100_diff1": 0.2891333801979083, - "nauc_mrr_at_100_max": -0.1371078360866261, - "nauc_mrr_at_100_std": 0.12577922767556043, - "nauc_mrr_at_10_diff1": 0.30678787633680227, - "nauc_mrr_at_10_max": -0.1417415793576384, - "nauc_mrr_at_10_std": 0.11840012286416857, - "nauc_mrr_at_1_diff1": 0.3780410690211136, - "nauc_mrr_at_1_max": -0.15996276050998642, - "nauc_mrr_at_1_std": 0.12084151368405678, - "nauc_mrr_at_20_diff1": 0.29802916652653827, - "nauc_mrr_at_20_max": -0.1380992977602512, - "nauc_mrr_at_20_std": 0.11913643771545089, - "nauc_mrr_at_3_diff1": 0.34218208940138206, - "nauc_mrr_at_3_max": -0.15259828173240922, - "nauc_mrr_at_3_std": 0.11980199564671674, - "nauc_mrr_at_5_diff1": 0.31906571381939547, - "nauc_mrr_at_5_max": -0.14353131092973306, - "nauc_mrr_at_5_std": 0.12118536859900152, - "nauc_ndcg_at_1000_diff1": 0.1382546228263889, - "nauc_ndcg_at_1000_max": -0.1186022001179963, - "nauc_ndcg_at_1000_std": 0.29134546582965304, - "nauc_ndcg_at_100_diff1": 0.1608084546142371, - "nauc_ndcg_at_100_max": -0.1208877559776067, - "nauc_ndcg_at_100_std": 0.20420457544308251, - "nauc_ndcg_at_10_diff1": 0.21347296273381702, - "nauc_ndcg_at_10_max": -0.12360631260617351, - "nauc_ndcg_at_10_std": 0.13835357292918368, - "nauc_ndcg_at_1_diff1": 0.35720778458967256, - "nauc_ndcg_at_1_max": -0.15917747193329645, - "nauc_ndcg_at_1_std": 0.12578638230216885, - "nauc_ndcg_at_20_diff1": 0.189575288463788, - "nauc_ndcg_at_20_max": -0.12047147115102609, - "nauc_ndcg_at_20_std": 0.14823959023714925, - "nauc_ndcg_at_3_diff1": 0.28072213819651337, - "nauc_ndcg_at_3_max": -0.1493321151811889, - "nauc_ndcg_at_3_std": 0.1313988281635346, - "nauc_ndcg_at_5_diff1": 0.23487221921337986, - "nauc_ndcg_at_5_max": -0.12854699731824779, - "nauc_ndcg_at_5_std": 0.13838213366410212, - "nauc_precision_at_1000_diff1": 0.09153318291899752, - "nauc_precision_at_1000_max": -0.10712625620860772, - "nauc_precision_at_1000_std": 0.3486808234128117, - "nauc_precision_at_100_diff1": 0.11829783638630895, - "nauc_precision_at_100_max": -0.11229273014278543, - "nauc_precision_at_100_std": 0.2457874398625524, - "nauc_precision_at_10_diff1": 0.1586640506586383, - "nauc_precision_at_10_max": -0.10658265337446211, - "nauc_precision_at_10_std": 0.1433984006213967, - "nauc_precision_at_1_diff1": 0.35720778458967256, - "nauc_precision_at_1_max": -0.15917747193329645, - "nauc_precision_at_1_std": 0.12578638230216885, - "nauc_precision_at_20_diff1": 0.13529505359357108, - "nauc_precision_at_20_max": -0.10702832193841087, - "nauc_precision_at_20_std": 0.16042770655667163, - "nauc_precision_at_3_diff1": 0.2468790893829598, - "nauc_precision_at_3_max": -0.1425487542554598, - "nauc_precision_at_3_std": 0.1347458330420782, - "nauc_precision_at_5_diff1": 0.17923534723711776, - "nauc_precision_at_5_max": -0.10942897722644009, - "nauc_precision_at_5_std": 0.1456878106837016, - "nauc_recall_at_1000_diff1": 0.09153318291899752, - "nauc_recall_at_1000_max": -0.1071262562086077, - "nauc_recall_at_1000_std": 0.34868082341281187, - "nauc_recall_at_100_diff1": 0.11829783638630909, - "nauc_recall_at_100_max": -0.11229273014278539, - "nauc_recall_at_100_std": 0.24578743986255244, - "nauc_recall_at_10_diff1": 0.1586640506586383, - "nauc_recall_at_10_max": -0.10658265337446211, - "nauc_recall_at_10_std": 0.1433984006213967, - "nauc_recall_at_1_diff1": 0.3572077845896727, - "nauc_recall_at_1_max": -0.1591774719332963, - "nauc_recall_at_1_std": 0.1257863823021691, - "nauc_recall_at_20_diff1": 0.13529505359357108, - "nauc_recall_at_20_max": -0.10702832193841087, - "nauc_recall_at_20_std": 0.16042770655667163, - "nauc_recall_at_3_diff1": 0.24687908938295963, - "nauc_recall_at_3_max": -0.14254875425545974, - "nauc_recall_at_3_std": 0.1347458330420781, - "nauc_recall_at_5_diff1": 0.17923534723711776, - "nauc_recall_at_5_max": -0.10942897722644009, - "nauc_recall_at_5_std": 0.1456878106837016, - "ndcg_at_1": 0.0301, - "ndcg_at_10": 0.01511, - "ndcg_at_100": 0.03019, - "ndcg_at_1000": 0.04943, - "ndcg_at_20": 0.01882, - "ndcg_at_3": 0.02242, - "ndcg_at_5": 0.01915, - "precision_at_1": 0.0301, - "precision_at_10": 0.01197, - "precision_at_100": 0.00477, - "precision_at_1000": 0.0012, - "precision_at_20": 0.00935, - "precision_at_3": 0.02022, - "precision_at_5": 0.01635, - "recall_at_1": 0.00301, - "recall_at_10": 0.01197, - "recall_at_100": 0.04767, - "recall_at_1000": 0.11975, - "recall_at_20": 0.01871, - "recall_at_3": 0.00607, - "recall_at_5": 0.00817 + "main_score": 0.03809, + "map_at_1": 0.01343, + "map_at_10": 0.02804, + "map_at_100": 0.03742, + "map_at_1000": 0.03951, + "map_at_20": 0.0312, + "map_at_3": 0.02227, + "map_at_5": 0.0247, + "mrr_at_1": 0.013429034498726558, + "mrr_at_10": 0.028036417160056885, + "mrr_at_100": 0.037420008046085314, + "mrr_at_1000": 0.039505486644354056, + "mrr_at_20": 0.031201242210305073, + "mrr_at_3": 0.022265956625762126, + "mrr_at_5": 0.024697074940186765, + "nauc_cv_recall_at_1000_diff1": 0.050583394811374374, + "nauc_cv_recall_at_1000_max": -0.18376667036878835, + "nauc_cv_recall_at_1000_std": 0.1298217845164567, + "nauc_cv_recall_at_100_diff1": 0.027698576038987024, + "nauc_cv_recall_at_100_max": -0.009117749373629837, + "nauc_cv_recall_at_100_std": 0.006829941177572575, + "nauc_cv_recall_at_10_diff1": 0.16070351829818008, + "nauc_cv_recall_at_10_max": -0.06294639384504126, + "nauc_cv_recall_at_10_std": -0.008037027217108009, + "nauc_cv_recall_at_1_diff1": 0.38564376924174965, + "nauc_cv_recall_at_1_max": -0.14158948678291924, + "nauc_cv_recall_at_1_std": -0.04672599956819156, + "nauc_cv_recall_at_20_diff1": 0.08493892106912637, + "nauc_cv_recall_at_20_max": -0.015163802751554916, + "nauc_cv_recall_at_20_std": 0.021963421659727856, + "nauc_cv_recall_at_3_diff1": 0.24266018554188226, + "nauc_cv_recall_at_3_max": -0.09291021576633819, + "nauc_cv_recall_at_3_std": -0.02148263688905047, + "nauc_cv_recall_at_5_diff1": 0.21381393860129796, + "nauc_cv_recall_at_5_max": -0.11435865792008686, + "nauc_cv_recall_at_5_std": -0.019942360767102027, + "nauc_map_at_1000_diff1": 0.20370724207547133, + "nauc_map_at_1000_max": -0.07702226966924787, + "nauc_map_at_1000_std": -0.014549893563493205, + "nauc_map_at_100_diff1": 0.20105115841633822, + "nauc_map_at_100_max": -0.0738589428173749, + "nauc_map_at_100_std": -0.015012934920928511, + "nauc_map_at_10_diff1": 0.2501026724956375, + "nauc_map_at_10_max": -0.09835226805301363, + "nauc_map_at_10_std": -0.024470727792863956, + "nauc_map_at_1_diff1": 0.38564376924174965, + "nauc_map_at_1_max": -0.14158948678291924, + "nauc_map_at_1_std": -0.04672599956819156, + "nauc_map_at_20_diff1": 0.2244173819284951, + "nauc_map_at_20_max": -0.08444395428873437, + "nauc_map_at_20_std": -0.016020286898738696, + "nauc_map_at_3_diff1": 0.2910544924422312, + "nauc_map_at_3_max": -0.10878289402033367, + "nauc_map_at_3_std": -0.03140611440353479, + "nauc_map_at_5_diff1": 0.2743756933858256, + "nauc_map_at_5_max": -0.11562364386231114, + "nauc_map_at_5_std": -0.02997097324144088, + "nauc_mrr_at_1000_diff1": 0.20370724207547133, + "nauc_mrr_at_1000_max": -0.07702226966924787, + "nauc_mrr_at_1000_std": -0.014549893563493205, + "nauc_mrr_at_100_diff1": 0.20105115841633822, + "nauc_mrr_at_100_max": -0.0738589428173749, + "nauc_mrr_at_100_std": -0.015012934920928511, + "nauc_mrr_at_10_diff1": 0.2501026724956375, + "nauc_mrr_at_10_max": -0.09835226805301363, + "nauc_mrr_at_10_std": -0.024470727792863956, + "nauc_mrr_at_1_diff1": 0.38564376924174965, + "nauc_mrr_at_1_max": -0.14158948678291924, + "nauc_mrr_at_1_std": -0.04672599956819156, + "nauc_mrr_at_20_diff1": 0.2244173819284951, + "nauc_mrr_at_20_max": -0.08444395428873437, + "nauc_mrr_at_20_std": -0.016020286898738696, + "nauc_mrr_at_3_diff1": 0.2910544924422312, + "nauc_mrr_at_3_max": -0.10878289402033367, + "nauc_mrr_at_3_std": -0.03140611440353479, + "nauc_mrr_at_5_diff1": 0.2743756933858256, + "nauc_mrr_at_5_max": -0.11562364386231114, + "nauc_mrr_at_5_std": -0.02997097324144088, + "nauc_ndcg_at_1000_diff1": 0.13406325540576058, + "nauc_ndcg_at_1000_max": -0.07106041428299001, + "nauc_ndcg_at_1000_std": 0.012519681774848902, + "nauc_ndcg_at_100_diff1": 0.10175722112103643, + "nauc_ndcg_at_100_max": -0.03450295253160438, + "nauc_ndcg_at_100_std": -0.0014070304908460896, + "nauc_ndcg_at_10_diff1": 0.21195276364011353, + "nauc_ndcg_at_10_max": -0.08376357345515494, + "nauc_ndcg_at_10_std": -0.017452778189587872, + "nauc_ndcg_at_1_diff1": 0.38564376924174965, + "nauc_ndcg_at_1_max": -0.14158948678291924, + "nauc_ndcg_at_1_std": -0.04672599956819156, + "nauc_ndcg_at_20_diff1": 0.15751370600120412, + "nauc_ndcg_at_20_max": -0.05224769346714892, + "nauc_ndcg_at_20_std": 0.0017388040866098441, + "nauc_ndcg_at_3_diff1": 0.27420783124692033, + "nauc_ndcg_at_3_max": -0.10323505279682847, + "nauc_ndcg_at_3_std": -0.028002766346549326, + "nauc_ndcg_at_5_diff1": 0.2515210601839793, + "nauc_ndcg_at_5_max": -0.11470427341623049, + "nauc_ndcg_at_5_std": -0.026194743288170313, + "nauc_precision_at_1000_diff1": 0.050583394811373666, + "nauc_precision_at_1000_max": -0.18376667036878533, + "nauc_precision_at_1000_std": 0.1298217845164564, + "nauc_precision_at_100_diff1": 0.027698576038987212, + "nauc_precision_at_100_max": -0.009117749373629547, + "nauc_precision_at_100_std": 0.006829941177572595, + "nauc_precision_at_10_diff1": 0.16070351829818025, + "nauc_precision_at_10_max": -0.06294639384504118, + "nauc_precision_at_10_std": -0.008037027217107877, + "nauc_precision_at_1_diff1": 0.38564376924174965, + "nauc_precision_at_1_max": -0.14158948678291924, + "nauc_precision_at_1_std": -0.04672599956819156, + "nauc_precision_at_20_diff1": 0.08493892106912652, + "nauc_precision_at_20_max": -0.015163802751554795, + "nauc_precision_at_20_std": 0.021963421659728005, + "nauc_precision_at_3_diff1": 0.24266018554188226, + "nauc_precision_at_3_max": -0.09291021576633826, + "nauc_precision_at_3_std": -0.02148263688905037, + "nauc_precision_at_5_diff1": 0.2138139386012981, + "nauc_precision_at_5_max": -0.11435865792008668, + "nauc_precision_at_5_std": -0.019942360767101795, + "nauc_recall_at_1000_diff1": 0.050583394811374374, + "nauc_recall_at_1000_max": -0.18376667036878835, + "nauc_recall_at_1000_std": 0.1298217845164567, + "nauc_recall_at_100_diff1": 0.027698576038987024, + "nauc_recall_at_100_max": -0.009117749373629837, + "nauc_recall_at_100_std": 0.006829941177572575, + "nauc_recall_at_10_diff1": 0.16070351829818008, + "nauc_recall_at_10_max": -0.06294639384504126, + "nauc_recall_at_10_std": -0.008037027217108009, + "nauc_recall_at_1_diff1": 0.38564376924174965, + "nauc_recall_at_1_max": -0.14158948678291924, + "nauc_recall_at_1_std": -0.04672599956819156, + "nauc_recall_at_20_diff1": 0.08493892106912637, + "nauc_recall_at_20_max": -0.015163802751554916, + "nauc_recall_at_20_std": 0.021963421659727856, + "nauc_recall_at_3_diff1": 0.24266018554188226, + "nauc_recall_at_3_max": -0.09291021576633819, + "nauc_recall_at_3_std": -0.02148263688905047, + "nauc_recall_at_5_diff1": 0.21381393860129796, + "nauc_recall_at_5_max": -0.11435865792008686, + "nauc_recall_at_5_std": -0.019942360767102027, + "ndcg_at_1": 0.01343, + "ndcg_at_10": 0.03809, + "ndcg_at_100": 0.09961, + "ndcg_at_1000": 0.16333, + "ndcg_at_20": 0.05024, + "ndcg_at_3": 0.02539, + "ndcg_at_5": 0.02987, + "precision_at_1": 0.01343, + "precision_at_10": 0.00713, + "precision_at_100": 0.00396, + "precision_at_1000": 0.00091, + "precision_at_20": 0.00603, + "precision_at_3": 0.0115, + "precision_at_5": 0.00912, + "recall_at_1": 0.01343, + "recall_at_10": 0.07131, + "recall_at_100": 0.39569, + "recall_at_1000": 0.91433, + "recall_at_20": 0.12063, + "recall_at_3": 0.0345, + "recall_at_5": 0.04561 } ] }, From 4e1155c779be039b588b4279aa1611317180efd0 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 16 Oct 2024 09:01:06 +0100 Subject: [PATCH 49/73] add VQA2 results --- .../Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py | 10 +- .../eng/VizWizIT2TRetrieval.py | 2 +- .../VQA2IT2TRetrieval.json | 186 ++++++++++++++++++ 3 files changed, 192 insertions(+), 6 deletions(-) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VQA2IT2TRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py index 0bfcee1c06..cb5c3b38e5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py @@ -11,7 +11,7 @@ class VQA2IT2TRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2017/html/Goyal_Making_the_v_CVPR_2017_paper.html", dataset={ "path": "JamieSJS/vqa-2", - "revision": "addc919d80589a767212ca455657f4e6c55e71f8", + "revision": "69882b6ba0b443dd62e633e546725b0f13b7e3aa", "trust_remote_code": True, }, type="Retrieval", @@ -36,14 +36,14 @@ class VQA2IT2TRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"test": 214354}, + "n_samples": {"test": 4319}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 2143540, - "num_queries": 214354, - "average_relevant_docs_per_query": 10, + "num_documents": 2091, + "num_queries": 4319, + "average_relevant_docs_per_query": 1, } }, }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py index f2e94328c3..5565ca9f50 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py @@ -44,7 +44,7 @@ class VizWizIT2TRetrieval(AbsTaskAny2AnyRetrieval): "average_query_length": 0.0, "num_documents": 2143540, "num_queries": 214354, - "average_relevant_docs_per_query": 10, + "average_relevant_docs_per_query": 1, } }, }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VQA2IT2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VQA2IT2TRetrieval.json new file mode 100644 index 0000000000..63970704c1 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/VQA2IT2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "69882b6ba0b443dd62e633e546725b0f13b7e3aa", + "evaluation_time": 1096.5730390548706, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.00224, + "cv_recall_at_10": 0.01687, + "cv_recall_at_100": 0.07996, + "cv_recall_at_1000": 0.2859, + "cv_recall_at_20": 0.02905, + "cv_recall_at_3": 0.00618, + "cv_recall_at_5": 0.00962, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.0082, + "map_at_1": 0.00224, + "map_at_10": 0.00561, + "map_at_100": 0.0076, + "map_at_1000": 0.00818, + "map_at_20": 0.00644, + "map_at_3": 0.00389, + "map_at_5": 0.00467, + "mrr_at_1": 0.0022392864140627186, + "mrr_at_10": 0.005608912626509676, + "mrr_at_100": 0.007598006845600478, + "mrr_at_1000": 0.008184917799157795, + "mrr_at_20": 0.006437587783214147, + "mrr_at_3": 0.003891537674438876, + "mrr_at_5": 0.00466502452329641, + "nauc_cv_recall_at_1000_diff1": 0.008825837565798213, + "nauc_cv_recall_at_1000_max": 0.06850469648818859, + "nauc_cv_recall_at_1000_std": -0.0651814166780974, + "nauc_cv_recall_at_100_diff1": 0.045034516026494435, + "nauc_cv_recall_at_100_max": 0.004398660634268218, + "nauc_cv_recall_at_100_std": -0.03975561918800372, + "nauc_cv_recall_at_10_diff1": 0.06821726635054984, + "nauc_cv_recall_at_10_max": -0.01603124686173661, + "nauc_cv_recall_at_10_std": -0.08733387779771963, + "nauc_cv_recall_at_1_diff1": 0.17890721176822888, + "nauc_cv_recall_at_1_max": -0.08979014629444568, + "nauc_cv_recall_at_1_std": -0.07645555550866222, + "nauc_cv_recall_at_20_diff1": 0.06573644523017931, + "nauc_cv_recall_at_20_max": -0.008446727739883213, + "nauc_cv_recall_at_20_std": -0.10159027094805619, + "nauc_cv_recall_at_3_diff1": 0.10725987074701412, + "nauc_cv_recall_at_3_max": -0.04337993473508564, + "nauc_cv_recall_at_3_std": -0.06088188555814698, + "nauc_cv_recall_at_5_diff1": 0.08502413140032329, + "nauc_cv_recall_at_5_max": -0.018521868603006558, + "nauc_cv_recall_at_5_std": -0.07681198564756143, + "nauc_map_at_1000_diff1": 0.0857862921438581, + "nauc_map_at_1000_max": -0.022301252824256254, + "nauc_map_at_1000_std": -0.07002071019417072, + "nauc_map_at_100_diff1": 0.09059194638791825, + "nauc_map_at_100_max": -0.02730484396719757, + "nauc_map_at_100_std": -0.07112702467096156, + "nauc_map_at_10_diff1": 0.10561558040018261, + "nauc_map_at_10_max": -0.03887680003815409, + "nauc_map_at_10_std": -0.07384905648271843, + "nauc_map_at_1_diff1": 0.17890721176822888, + "nauc_map_at_1_max": -0.08979014629444568, + "nauc_map_at_1_std": -0.07645555550866222, + "nauc_map_at_20_diff1": 0.09999678477824628, + "nauc_map_at_20_max": -0.033642117064318564, + "nauc_map_at_20_std": -0.08006217604646698, + "nauc_map_at_3_diff1": 0.13264050928045265, + "nauc_map_at_3_max": -0.05878443289453297, + "nauc_map_at_3_std": -0.06106821971711554, + "nauc_map_at_5_diff1": 0.11792502863936245, + "nauc_map_at_5_max": -0.04439031923384928, + "nauc_map_at_5_std": -0.06823912896230401, + "nauc_mrr_at_1000_diff1": 0.08578620852479406, + "nauc_mrr_at_1000_max": -0.022301287770317674, + "nauc_mrr_at_1000_std": -0.07002051705337815, + "nauc_mrr_at_100_diff1": 0.09059184648214072, + "nauc_mrr_at_100_max": -0.02730487433721151, + "nauc_mrr_at_100_std": -0.07112681168567145, + "nauc_mrr_at_10_diff1": 0.10561558040018261, + "nauc_mrr_at_10_max": -0.03887680003815409, + "nauc_mrr_at_10_std": -0.07384905648271843, + "nauc_mrr_at_1_diff1": 0.17890721176822888, + "nauc_mrr_at_1_max": -0.08979014629444568, + "nauc_mrr_at_1_std": -0.07645555550866222, + "nauc_mrr_at_20_diff1": 0.09999678477824628, + "nauc_mrr_at_20_max": -0.033642117064318564, + "nauc_mrr_at_20_std": -0.08006217604646698, + "nauc_mrr_at_3_diff1": 0.13264050928045265, + "nauc_mrr_at_3_max": -0.05878443289453297, + "nauc_mrr_at_3_std": -0.06106821971711554, + "nauc_mrr_at_5_diff1": 0.11792502863936245, + "nauc_mrr_at_5_max": -0.04439031923384928, + "nauc_mrr_at_5_std": -0.06823912896230401, + "nauc_ndcg_at_1000_diff1": 0.03027392253878934, + "nauc_ndcg_at_1000_max": 0.03872756726005947, + "nauc_ndcg_at_1000_std": -0.06339954815558649, + "nauc_ndcg_at_100_diff1": 0.060190435328092255, + "nauc_ndcg_at_100_max": -0.006149217698640977, + "nauc_ndcg_at_100_std": -0.05423336919167363, + "nauc_ndcg_at_10_diff1": 0.08801115068128108, + "nauc_ndcg_at_10_max": -0.027958780559606267, + "nauc_ndcg_at_10_std": -0.0798528780495694, + "nauc_ndcg_at_1_diff1": 0.17890721176822888, + "nauc_ndcg_at_1_max": -0.08979014629444568, + "nauc_ndcg_at_1_std": -0.07645555550866222, + "nauc_ndcg_at_20_diff1": 0.08098580648408388, + "nauc_ndcg_at_20_max": -0.019828494542212433, + "nauc_ndcg_at_20_std": -0.09119555503912527, + "nauc_ndcg_at_3_diff1": 0.12374455660074443, + "nauc_ndcg_at_3_max": -0.05335042807216969, + "nauc_ndcg_at_3_std": -0.06082434994775198, + "nauc_ndcg_at_5_diff1": 0.10480672753283997, + "nauc_ndcg_at_5_max": -0.03413521896711701, + "nauc_ndcg_at_5_std": -0.07135709274825974, + "nauc_precision_at_1000_diff1": 0.00882583756579781, + "nauc_precision_at_1000_max": 0.06850469648818812, + "nauc_precision_at_1000_std": -0.06518141667809778, + "nauc_precision_at_100_diff1": 0.045034516026494456, + "nauc_precision_at_100_max": 0.004398660634268171, + "nauc_precision_at_100_std": -0.039755619188003716, + "nauc_precision_at_10_diff1": 0.06821726635055005, + "nauc_precision_at_10_max": -0.01603124686173649, + "nauc_precision_at_10_std": -0.08733387779771956, + "nauc_precision_at_1_diff1": 0.17890721176822888, + "nauc_precision_at_1_max": -0.08979014629444568, + "nauc_precision_at_1_std": -0.07645555550866222, + "nauc_precision_at_20_diff1": 0.06573644523017923, + "nauc_precision_at_20_max": -0.008446727739883295, + "nauc_precision_at_20_std": -0.10159027094805619, + "nauc_precision_at_3_diff1": 0.10725987074701417, + "nauc_precision_at_3_max": -0.04337993473508582, + "nauc_precision_at_3_std": -0.060881885558147046, + "nauc_precision_at_5_diff1": 0.0850241314003234, + "nauc_precision_at_5_max": -0.01852186860300637, + "nauc_precision_at_5_std": -0.0768119856475613, + "nauc_recall_at_1000_diff1": 0.008825837565798213, + "nauc_recall_at_1000_max": 0.06850469648818859, + "nauc_recall_at_1000_std": -0.0651814166780974, + "nauc_recall_at_100_diff1": 0.045034516026494435, + "nauc_recall_at_100_max": 0.004398660634268218, + "nauc_recall_at_100_std": -0.03975561918800372, + "nauc_recall_at_10_diff1": 0.06821726635054984, + "nauc_recall_at_10_max": -0.01603124686173661, + "nauc_recall_at_10_std": -0.08733387779771963, + "nauc_recall_at_1_diff1": 0.17890721176822888, + "nauc_recall_at_1_max": -0.08979014629444568, + "nauc_recall_at_1_std": -0.07645555550866222, + "nauc_recall_at_20_diff1": 0.06573644523017931, + "nauc_recall_at_20_max": -0.008446727739883213, + "nauc_recall_at_20_std": -0.10159027094805619, + "nauc_recall_at_3_diff1": 0.10725987074701412, + "nauc_recall_at_3_max": -0.04337993473508564, + "nauc_recall_at_3_std": -0.06088188555814698, + "nauc_recall_at_5_diff1": 0.08502413140032329, + "nauc_recall_at_5_max": -0.018521868603006558, + "nauc_recall_at_5_std": -0.07681198564756143, + "ndcg_at_1": 0.00224, + "ndcg_at_10": 0.0082, + "ndcg_at_100": 0.02041, + "ndcg_at_1000": 0.04417, + "ndcg_at_20": 0.01126, + "ndcg_at_3": 0.00448, + "ndcg_at_5": 0.00588, + "precision_at_1": 0.00224, + "precision_at_10": 0.00169, + "precision_at_100": 0.0008, + "precision_at_1000": 0.00029, + "precision_at_20": 0.00145, + "precision_at_3": 0.00206, + "precision_at_5": 0.00192, + "recall_at_1": 0.00224, + "recall_at_10": 0.01687, + "recall_at_100": 0.07996, + "recall_at_1000": 0.2859, + "recall_at_20": 0.02905, + "recall_at_3": 0.00618, + "recall_at_5": 0.00962 + } + ] + }, + "task_name": "VQA2IT2TRetrieval" +} \ No newline at end of file From e91faf6f75826097f896f5b8b8d81c69574ef915 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 17 Oct 2024 11:30:22 +0100 Subject: [PATCH 50/73] add GLD v2 I2T retrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py | 50 +++++ .../GLDv2I2TRetrieval.json | 186 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2TRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 98d1afcd2d..0c7a44ab52 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -10,6 +10,7 @@ from .eng.Flickr30kI2TRetrieval import * from .eng.Flickr30kT2IRetrieval import * from .eng.FORBI2IRetrieval import * +from .eng.GLDv2I2TRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * from .eng.ImageCoDeT2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py new file mode 100644 index 0000000000..9539ef31b3 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class GLDv2I2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="GLDv2I2TRetrieval", + description="Retrieve names of landmarks based on their image.", + reference="https://openaccess.thecvf.com/content_CVPR_2020/html/Weyand_Google_Landmarks_Dataset_v2_-_A_Large-Scale_Benchmark_for_Instance-Level_CVPR_2020_paper.html", + dataset={ + "path": "JamieSJS/gld-v2-i2t", + "revision": "d8c3e53160860f76de73ed3041a8593672fe5928", + }, + type="Retrieval", + category="i2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2017-01-01", "2017-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="Apache-2.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation="""@InProceedings{Weyand_2020_CVPR, +author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, +title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2020} +} + +""", + descriptive_stats={ + "n_samples": {"test": 1972}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 674, + "num_queries": 1972, + "average_relevant_docs_per_query": 1.0, + } + }, + }, + ) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2TRetrieval.json new file mode 100644 index 0000000000..5454017683 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2TRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "d8c3e53160860f76de73ed3041a8593672fe5928", + "evaluation_time": 7.942275285720825, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.45833, + "cv_recall_at_10": 0.75939, + "cv_recall_at_100": 0.94894, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.82923, + "cv_recall_at_3": 0.61737, + "cv_recall_at_5": 0.67547, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.59091, + "map_at_1": 0.41862, + "map_at_10": 0.53693, + "map_at_100": 0.54643, + "map_at_1000": 0.54678, + "map_at_20": 0.54267, + "map_at_3": 0.50797, + "map_at_5": 0.52367, + "mrr_at_1": 0.4583333333333333, + "mrr_at_10": 0.5535262500931519, + "mrr_at_100": 0.5614522827750004, + "mrr_at_1000": 0.5617553026170138, + "mrr_at_20": 0.5583628555240837, + "mrr_at_3": 0.528951486697966, + "mrr_at_5": 0.542243740219093, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.4805358684051926, + "nauc_cv_recall_at_100_max": 0.5649594006452747, + "nauc_cv_recall_at_100_std": 0.32360974466378195, + "nauc_cv_recall_at_10_diff1": 0.5013034837210644, + "nauc_cv_recall_at_10_max": 0.4802128509459776, + "nauc_cv_recall_at_10_std": 0.09412450554170432, + "nauc_cv_recall_at_1_diff1": 0.636898011304436, + "nauc_cv_recall_at_1_max": 0.44436198490104667, + "nauc_cv_recall_at_1_std": -0.06652018253703539, + "nauc_cv_recall_at_20_diff1": 0.47564828778050955, + "nauc_cv_recall_at_20_max": 0.5259584288342394, + "nauc_cv_recall_at_20_std": 0.1768940709215821, + "nauc_cv_recall_at_3_diff1": 0.5205869375457675, + "nauc_cv_recall_at_3_max": 0.450051437202706, + "nauc_cv_recall_at_3_std": -0.029414267124329728, + "nauc_cv_recall_at_5_diff1": 0.5001251296481215, + "nauc_cv_recall_at_5_max": 0.4693890552265276, + "nauc_cv_recall_at_5_std": 0.013796336174200773, + "nauc_map_at_1000_diff1": 0.5818434859115754, + "nauc_map_at_1000_max": 0.4430387835643193, + "nauc_map_at_1000_std": -0.04099022306215756, + "nauc_map_at_100_diff1": 0.5817200322199132, + "nauc_map_at_100_max": 0.44314688497711546, + "nauc_map_at_100_std": -0.040679605719069825, + "nauc_map_at_10_diff1": 0.5806678508477698, + "nauc_map_at_10_max": 0.4427501241681824, + "nauc_map_at_10_std": -0.04102488719045883, + "nauc_map_at_1_diff1": 0.6363475951973468, + "nauc_map_at_1_max": 0.3978201419843627, + "nauc_map_at_1_std": -0.07535068057179833, + "nauc_map_at_20_diff1": 0.5811344454158822, + "nauc_map_at_20_max": 0.4440615143544749, + "nauc_map_at_20_std": -0.03955530709812017, + "nauc_map_at_3_diff1": 0.583539903148956, + "nauc_map_at_3_max": 0.4347198346189301, + "nauc_map_at_3_std": -0.05914956536629059, + "nauc_map_at_5_diff1": 0.5808229323614098, + "nauc_map_at_5_max": 0.44138202029415413, + "nauc_map_at_5_std": -0.05072257931247744, + "nauc_mrr_at_1000_diff1": 0.5881203904403035, + "nauc_mrr_at_1000_max": 0.4484481284666203, + "nauc_mrr_at_1000_std": -0.041015672924594425, + "nauc_mrr_at_100_diff1": 0.5880202802699318, + "nauc_mrr_at_100_max": 0.4485244130022475, + "nauc_mrr_at_100_std": -0.04073803891469471, + "nauc_mrr_at_10_diff1": 0.5872948949505256, + "nauc_mrr_at_10_max": 0.44803331196596113, + "nauc_mrr_at_10_std": -0.040942756000374045, + "nauc_mrr_at_1_diff1": 0.636898011304436, + "nauc_mrr_at_1_max": 0.44436198490104667, + "nauc_mrr_at_1_std": -0.06652018253703539, + "nauc_mrr_at_20_diff1": 0.5873601429560038, + "nauc_mrr_at_20_max": 0.4491342783654011, + "nauc_mrr_at_20_std": -0.039490970673790156, + "nauc_mrr_at_3_diff1": 0.5876254651867967, + "nauc_mrr_at_3_max": 0.4441519579742525, + "nauc_mrr_at_3_std": -0.05530094979107773, + "nauc_mrr_at_5_diff1": 0.5853848397651132, + "nauc_mrr_at_5_max": 0.44757418986482955, + "nauc_mrr_at_5_std": -0.047728167237815616, + "nauc_ndcg_at_1000_diff1": 0.5746984338116712, + "nauc_ndcg_at_1000_max": 0.44886091973812225, + "nauc_ndcg_at_1000_std": -0.02776328674880611, + "nauc_ndcg_at_100_diff1": 0.5718934034391514, + "nauc_ndcg_at_100_max": 0.4519133095623923, + "nauc_ndcg_at_100_std": -0.01859333456676269, + "nauc_ndcg_at_10_diff1": 0.5648965674150932, + "nauc_ndcg_at_10_max": 0.4509288057994942, + "nauc_ndcg_at_10_std": -0.01760326906724774, + "nauc_ndcg_at_1_diff1": 0.636898011304436, + "nauc_ndcg_at_1_max": 0.44436198490104667, + "nauc_ndcg_at_1_std": -0.06652018253703539, + "nauc_ndcg_at_20_diff1": 0.5653913906886788, + "nauc_ndcg_at_20_max": 0.4567155165752001, + "nauc_ndcg_at_20_std": -0.010364885098699228, + "nauc_ndcg_at_3_diff1": 0.5691469740706386, + "nauc_ndcg_at_3_max": 0.4377098844996301, + "nauc_ndcg_at_3_std": -0.05395678784871217, + "nauc_ndcg_at_5_diff1": 0.5637652891437743, + "nauc_ndcg_at_5_max": 0.44804388763273256, + "nauc_ndcg_at_5_std": -0.03867023871561297, + "nauc_precision_at_1000_diff1": -0.1269295688966779, + "nauc_precision_at_1000_max": 0.09576039350822045, + "nauc_precision_at_1000_std": 0.09159920254296734, + "nauc_precision_at_100_diff1": -0.007437154335484724, + "nauc_precision_at_100_max": 0.20039321410244404, + "nauc_precision_at_100_std": 0.1426867944558165, + "nauc_precision_at_10_diff1": 0.2452553975868521, + "nauc_precision_at_10_max": 0.37544079604903624, + "nauc_precision_at_10_std": 0.10584035906560733, + "nauc_precision_at_1_diff1": 0.636898011304436, + "nauc_precision_at_1_max": 0.44436198490104667, + "nauc_precision_at_1_std": -0.06652018253703539, + "nauc_precision_at_20_diff1": 0.16916910475445357, + "nauc_precision_at_20_max": 0.34642136478127744, + "nauc_precision_at_20_std": 0.14261345610270082, + "nauc_precision_at_3_diff1": 0.377439212377142, + "nauc_precision_at_3_max": 0.4175722537265246, + "nauc_precision_at_3_std": -0.001966043546048812, + "nauc_precision_at_5_diff1": 0.32001958998919133, + "nauc_precision_at_5_max": 0.4091917985758222, + "nauc_precision_at_5_std": 0.037734817146087056, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.46741327337617883, + "nauc_recall_at_100_max": 0.5601227994679975, + "nauc_recall_at_100_std": 0.32393562317395125, + "nauc_recall_at_10_diff1": 0.48441585851399666, + "nauc_recall_at_10_max": 0.46628557295962697, + "nauc_recall_at_10_std": 0.08341081487340105, + "nauc_recall_at_1_diff1": 0.6363475951973468, + "nauc_recall_at_1_max": 0.3978201419843627, + "nauc_recall_at_1_std": -0.07535068057179833, + "nauc_recall_at_20_diff1": 0.4621173128401141, + "nauc_recall_at_20_max": 0.5117026275838669, + "nauc_recall_at_20_std": 0.1626250437132122, + "nauc_recall_at_3_diff1": 0.5164962617526161, + "nauc_recall_at_3_max": 0.43204598915930476, + "nauc_recall_at_3_std": -0.04234979378579455, + "nauc_recall_at_5_diff1": 0.49384971068639183, + "nauc_recall_at_5_max": 0.4557447504137923, + "nauc_recall_at_5_std": -0.0012052087838667518, + "ndcg_at_1": 0.45833, + "ndcg_at_10": 0.59091, + "ndcg_at_100": 0.6353, + "ndcg_at_1000": 0.64262, + "ndcg_at_20": 0.61038, + "ndcg_at_3": 0.5348, + "ndcg_at_5": 0.56109, + "precision_at_1": 0.45833, + "precision_at_10": 0.08474, + "precision_at_100": 0.01079, + "precision_at_1000": 0.00114, + "precision_at_20": 0.0466, + "precision_at_3": 0.22418, + "precision_at_5": 0.14906, + "recall_at_1": 0.41862, + "recall_at_10": 0.74169, + "recall_at_100": 0.9464, + "recall_at_1000": 1.0, + "recall_at_20": 0.81641, + "recall_at_3": 0.59165, + "recall_at_5": 0.65278 + } + ] + }, + "task_name": "GLDv2I2TRetrieval" +} \ No newline at end of file From f2721c00775374c4df6aa6c840641fbd4e8540e1 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 17 Oct 2024 11:47:22 +0100 Subject: [PATCH 51/73] add gld v2 i2i retrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 + .../Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py | 50 +++++ .../GLDv2I2IRetrieval.json | 186 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 0c7a44ab52..920e5361ea 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -11,6 +11,7 @@ from .eng.Flickr30kT2IRetrieval import * from .eng.FORBI2IRetrieval import * from .eng.GLDv2I2TRetrieval import * +from .eng.GLDv2I2IRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * from .eng.ImageCoDeT2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py new file mode 100644 index 0000000000..5ae81959f2 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class GLDv2I2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="GLDv2I2IRetrieval", + description="Retrieve names of landmarks based on their image.", + reference="https://openaccess.thecvf.com/content_CVPR_2020/html/Weyand_Google_Landmarks_Dataset_v2_-_A_Large-Scale_Benchmark_for_Instance-Level_CVPR_2020_paper.html", + dataset={ + "path": "JamieSJS/gld-v2-i2i", + "revision": "b9286d3e5b0404de5fcd9f52cabd88de4968727b", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2017-01-01", "2017-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="Apache-2.0", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@InProceedings{Weyand_2020_CVPR, +author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, +title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2020} +} + +""", + descriptive_stats={ + "n_samples": {"test": 1972}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 1972, + "num_queries": 1972, + "average_relevant_docs_per_query": 5.27, + } + }, + }, + ) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json new file mode 100644 index 0000000000..7b65577dfd --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "b9286d3e5b0404de5fcd9f52cabd88de4968727b", + "evaluation_time": 15.037300109863281, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 1.0, + "cv_recall_at_10": 1.0, + "cv_recall_at_100": 1.0, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 1.0, + "cv_recall_at_3": 1.0, + "cv_recall_at_5": 1.0, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 1.0, + "map_at_1": 1.0, + "map_at_10": 1.0, + "map_at_100": 1.0, + "map_at_1000": 1.0, + "map_at_20": 1.0, + "map_at_3": 1.0, + "map_at_5": 1.0, + "mrr_at_1": 1.0, + "mrr_at_10": 1.0, + "mrr_at_100": 1.0, + "mrr_at_1000": 1.0, + "mrr_at_20": 1.0, + "mrr_at_3": 1.0, + "mrr_at_5": 1.0, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": NaN, + "nauc_cv_recall_at_10_max": NaN, + "nauc_cv_recall_at_10_std": NaN, + "nauc_cv_recall_at_1_diff1": NaN, + "nauc_cv_recall_at_1_max": NaN, + "nauc_cv_recall_at_1_std": NaN, + "nauc_cv_recall_at_20_diff1": NaN, + "nauc_cv_recall_at_20_max": NaN, + "nauc_cv_recall_at_20_std": NaN, + "nauc_cv_recall_at_3_diff1": NaN, + "nauc_cv_recall_at_3_max": NaN, + "nauc_cv_recall_at_3_std": NaN, + "nauc_cv_recall_at_5_diff1": NaN, + "nauc_cv_recall_at_5_max": NaN, + "nauc_cv_recall_at_5_std": NaN, + "nauc_map_at_1000_diff1": NaN, + "nauc_map_at_1000_max": NaN, + "nauc_map_at_1000_std": NaN, + "nauc_map_at_100_diff1": NaN, + "nauc_map_at_100_max": NaN, + "nauc_map_at_100_std": NaN, + "nauc_map_at_10_diff1": NaN, + "nauc_map_at_10_max": NaN, + "nauc_map_at_10_std": NaN, + "nauc_map_at_1_diff1": NaN, + "nauc_map_at_1_max": NaN, + "nauc_map_at_1_std": NaN, + "nauc_map_at_20_diff1": NaN, + "nauc_map_at_20_max": NaN, + "nauc_map_at_20_std": NaN, + "nauc_map_at_3_diff1": NaN, + "nauc_map_at_3_max": NaN, + "nauc_map_at_3_std": NaN, + "nauc_map_at_5_diff1": NaN, + "nauc_map_at_5_max": NaN, + "nauc_map_at_5_std": NaN, + "nauc_mrr_at_1000_diff1": NaN, + "nauc_mrr_at_1000_max": NaN, + "nauc_mrr_at_1000_std": NaN, + "nauc_mrr_at_100_diff1": NaN, + "nauc_mrr_at_100_max": NaN, + "nauc_mrr_at_100_std": NaN, + "nauc_mrr_at_10_diff1": NaN, + "nauc_mrr_at_10_max": NaN, + "nauc_mrr_at_10_std": NaN, + "nauc_mrr_at_1_diff1": NaN, + "nauc_mrr_at_1_max": NaN, + "nauc_mrr_at_1_std": NaN, + "nauc_mrr_at_20_diff1": NaN, + "nauc_mrr_at_20_max": NaN, + "nauc_mrr_at_20_std": NaN, + "nauc_mrr_at_3_diff1": NaN, + "nauc_mrr_at_3_max": NaN, + "nauc_mrr_at_3_std": NaN, + "nauc_mrr_at_5_diff1": NaN, + "nauc_mrr_at_5_max": NaN, + "nauc_mrr_at_5_std": NaN, + "nauc_ndcg_at_1000_diff1": NaN, + "nauc_ndcg_at_1000_max": NaN, + "nauc_ndcg_at_1000_std": NaN, + "nauc_ndcg_at_100_diff1": NaN, + "nauc_ndcg_at_100_max": NaN, + "nauc_ndcg_at_100_std": NaN, + "nauc_ndcg_at_10_diff1": NaN, + "nauc_ndcg_at_10_max": NaN, + "nauc_ndcg_at_10_std": NaN, + "nauc_ndcg_at_1_diff1": NaN, + "nauc_ndcg_at_1_max": NaN, + "nauc_ndcg_at_1_std": NaN, + "nauc_ndcg_at_20_diff1": NaN, + "nauc_ndcg_at_20_max": NaN, + "nauc_ndcg_at_20_std": NaN, + "nauc_ndcg_at_3_diff1": NaN, + "nauc_ndcg_at_3_max": NaN, + "nauc_ndcg_at_3_std": NaN, + "nauc_ndcg_at_5_diff1": NaN, + "nauc_ndcg_at_5_max": NaN, + "nauc_ndcg_at_5_std": NaN, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": NaN, + "nauc_precision_at_100_max": NaN, + "nauc_precision_at_100_std": NaN, + "nauc_precision_at_10_diff1": 1.0, + "nauc_precision_at_10_max": 1.0, + "nauc_precision_at_10_std": 1.0, + "nauc_precision_at_1_diff1": NaN, + "nauc_precision_at_1_max": NaN, + "nauc_precision_at_1_std": NaN, + "nauc_precision_at_20_diff1": 1.0, + "nauc_precision_at_20_max": 1.0, + "nauc_precision_at_20_std": 1.0, + "nauc_precision_at_3_diff1": 1.0, + "nauc_precision_at_3_max": 1.0, + "nauc_precision_at_3_std": 1.0, + "nauc_precision_at_5_diff1": 1.0, + "nauc_precision_at_5_max": 1.0, + "nauc_precision_at_5_std": 1.0, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": NaN, + "nauc_recall_at_10_max": NaN, + "nauc_recall_at_10_std": NaN, + "nauc_recall_at_1_diff1": NaN, + "nauc_recall_at_1_max": NaN, + "nauc_recall_at_1_std": NaN, + "nauc_recall_at_20_diff1": NaN, + "nauc_recall_at_20_max": NaN, + "nauc_recall_at_20_std": NaN, + "nauc_recall_at_3_diff1": NaN, + "nauc_recall_at_3_max": NaN, + "nauc_recall_at_3_std": NaN, + "nauc_recall_at_5_diff1": NaN, + "nauc_recall_at_5_max": NaN, + "nauc_recall_at_5_std": NaN, + "ndcg_at_1": 1.0, + "ndcg_at_10": 1.0, + "ndcg_at_100": 1.0, + "ndcg_at_1000": 1.0, + "ndcg_at_20": 1.0, + "ndcg_at_3": 1.0, + "ndcg_at_5": 1.0, + "precision_at_1": 1.0, + "precision_at_10": 0.1, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.05, + "precision_at_3": 0.33333, + "precision_at_5": 0.2, + "recall_at_1": 1.0, + "recall_at_10": 1.0, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 1.0, + "recall_at_3": 1.0, + "recall_at_5": 1.0 + } + ] + }, + "task_name": "GLDv2I2IRetrieval" +} \ No newline at end of file From c9df7aceb85bd7c2ace9e75bc1476c824fce6920 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 18 Oct 2024 09:52:21 +0100 Subject: [PATCH 52/73] make lint --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 920e5361ea..5ad2bba3bf 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -10,8 +10,8 @@ from .eng.Flickr30kI2TRetrieval import * from .eng.Flickr30kT2IRetrieval import * from .eng.FORBI2IRetrieval import * -from .eng.GLDv2I2TRetrieval import * from .eng.GLDv2I2IRetrieval import * +from .eng.GLDv2I2TRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * from .eng.ImageCoDeT2IRetrieval import * From a730eccbea1f304fda692b3cd09c528ab4f94535 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 18 Oct 2024 15:01:55 +0100 Subject: [PATCH 53/73] add AbsTaskAny2AnyMultiChoice --- .../Image/AbsTaskAny2AnyMultiChoice.py | 450 ++++++++++++++++ mteb/abstasks/__init__.py | 1 + .../Image/Any2AnyMultiChoiceEvaluator.py | 486 ++++++++++++++++++ mteb/evaluation/evaluators/__init__.py | 1 + .../Image/Any2AnyMultiChoice/__init__.py | 3 + .../eng/ImageCoDeT2IMultiChoice.py | 50 ++ mteb/tasks/Image/__init__.py | 1 + mteb/tasks/__init__.py | 1 + .../ImageCoDeT2IMultiChoice.json | 33 ++ 9 files changed, 1026 insertions(+) create mode 100644 mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py create mode 100644 mteb/evaluation/evaluators/Image/Any2AnyMultiChoiceEvaluator.py create mode 100644 mteb/tasks/Image/Any2AnyMultiChoice/__init__.py create mode 100644 mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IMultiChoice.json diff --git a/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py new file mode 100644 index 0000000000..ca37645e67 --- /dev/null +++ b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py @@ -0,0 +1,450 @@ +from __future__ import annotations + +import json +import logging +import os +from collections import defaultdict +from pathlib import Path +from time import time +from typing import Any + +import tqdm +from datasets import Features, Value, load_dataset +from PIL import Image + +from ...evaluation.evaluators import Any2AnyMultiChoiceEvaluator +from ...load_results.mteb_results import ScoresDict +from ..AbsTask import AbsTask + +logger = logging.getLogger(__name__) + + +class HFDataLoader: + def __init__( + self, + hf_repo: str | None = None, + hf_repo_qrels: str | None = None, + data_folder: str | None = None, + prefix: str | None = None, + corpus_file: str = "corpus.jsonl", + query_file: str = "queries.jsonl", + qrels_folder: str = "qrels", + qrels_file: str = "", + streaming: bool = False, + keep_in_memory: bool = False, + ): + self.corpus = {} + self.queries = {} + self.qrels = {} + self.hf_repo = hf_repo + if hf_repo: + # By default fetch qrels from same repo not a second repo with "-qrels" like in original + self.hf_repo_qrels = hf_repo_qrels if hf_repo_qrels else hf_repo + else: + # data folder would contain these files: + # (1) fiqa/corpus.jsonl (format: jsonlines) + # (2) fiqa/queries.jsonl (format: jsonlines) + # (3) fiqa/qrels/test.tsv (format: tsv ("\t")) + if prefix: + query_file = prefix + "-" + query_file + qrels_folder = prefix + "-" + qrels_folder + + self.corpus_file = ( + os.path.join(data_folder, corpus_file) if data_folder else corpus_file + ) + self.query_file = ( + os.path.join(data_folder, query_file) if data_folder else query_file + ) + self.qrels_folder = ( + os.path.join(data_folder, qrels_folder) if data_folder else None + ) + self.qrels_file = qrels_file + self.streaming = streaming + self.keep_in_memory = keep_in_memory + + @staticmethod + def check(fIn: str, ext: str): + if not os.path.exists(fIn): + raise ValueError(f"File {fIn} not present! Please provide accurate file.") + + if not fIn.endswith(ext): + raise ValueError(f"File {fIn} must be present with extension {ext}") + + def load( + self, split="test" + ) -> tuple[ + dict[str, dict[str, str | Image.Image]], + dict[str, dict[str, str | Image.Image]], + dict[str, dict[str, int]], + ]: + if not self.hf_repo: + self.qrels_file = os.path.join(self.qrels_folder, split + ".tsv") + self.check(fIn=self.corpus_file, ext="jsonl") + self.check(fIn=self.query_file, ext="jsonl") + self.check(fIn=self.qrels_file, ext="tsv") + + if not len(self.corpus): + logger.info("Loading Corpus...") + self._load_corpus() + logger.info("Loaded %d %s Documents.", len(self.corpus), split.upper()) + logger.info("Doc Example: %s", self.corpus[0]) + + if not len(self.queries): + logger.info("Loading Queries...") + self._load_queries(split) + + self._load_qrels(split) + # filter queries with no qrels + qrels_dict = defaultdict(dict) + + def qrels_dict_init(row): + qrels_dict[row["query-id"]][row["corpus-id"]] = int(row["score"]) + + self.qrels.map(qrels_dict_init) + self.qrels = qrels_dict + self.queries = self.queries.filter(lambda x: x["id"] in self.qrels) + logger.info("Loaded %d %s Queries.", len(self.queries), split.upper()) + logger.info("Query Example: %s", self.queries[0]) + + return self.corpus, self.queries, self.qrels + + def load_corpus(self) -> dict[str, dict[str, str]]: + if not self.hf_repo: + self.check(fIn=self.corpus_file, ext="jsonl") + + if not len(self.corpus): + logger.info("Loading Corpus...") + self._load_corpus() + logger.info("Loaded %d %s Documents.", len(self.corpus)) + logger.info("Doc Example: %s", self.corpus[0]) + + return self.corpus + + def _load_corpus(self): + if self.hf_repo: + corpus_ds = load_dataset( + self.hf_repo, + "corpus", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + )["corpus"] + else: + corpus_ds = load_dataset( + "json", + data_files=self.corpus_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + ) + self.corpus = corpus_ds + + def _load_queries(self, split): + if self.hf_repo: + queries_ds = load_dataset( + self.hf_repo, + "query", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + )[split] + else: + queries_ds = load_dataset( + "json", + data_files=self.query_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + ) + self.queries = queries_ds + + def _load_qrels(self, split): + if self.hf_repo: + qrels_ds = load_dataset( + self.hf_repo_qrels, + "qrels", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + )[split] + else: + qrels_ds = load_dataset( + "csv", + data_files=self.qrels_file, + delimiter="\t", + keep_in_memory=self.keep_in_memory, + ) + + if "Q0" in qrels_ds.column_names: + qrels_ds = qrels_ds.remove_columns("Q0") + features = Features( + { + "query-id": Value("string"), + "corpus-id": Value("string"), + "score": Value("float"), + } + ) + # Some datasets may have extra columns, e.g. `difficulty` in qrels for FORB. + qrels_ds = qrels_ds.select_columns(["query-id", "corpus-id", "score"]).cast( + features + ) + self.qrels = qrels_ds + + +class AbsTaskAny2AnyMultiChoice(AbsTask): + """Abstract class for Any2Any multiple choice experiments + + This is NOT a retrieval task: there is one correct answer among a set of candidates, which are a subset of the corpus, indicated in qrels with a relevance of 0 + + Child-classes must implement the following properties: + + self.corpus: dict[str, dict[str, str]] + Semantically, it should contain dict[split_name, dict[sample_id, dict[str, str]]] + E.g. {"test": {"document_one": {"_id": "d1", "title": "title", "text": "text"}}} + + self.queries: dict[str, dict[str, Union[str, List[str]]]] + Semantically, it should contain dict[split_name, dict[sample_id, str]] or dict[split_name, dict[sample_id, List[str]]] for conversations + E.g. {"test": {"q1": "query"}} + or {"test": {"q1": ["turn1", "turn2", "turn3"]}} + + self.relevant_docs: dict[str, dict[str, dict[str, int]]] + Semantically, it should contain dict[split_name, dict[sample_id, dict[doc_id, score]]] + E.g.: {"test": {"q1": {"document_one": 1}}} for hard positive samples (the correct choice) + E.g.: {"test": {"q1": {"document_two": 0}}} for hard negative samples (incorrect choices from the same query) + """ + + ignore_identical_ids: bool = False + skip_first_result: bool = False + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def load_data(self, **kwargs): + if self.data_loaded: + return + self.corpus, self.queries, self.relevant_docs = {}, {}, {} + dataset_path = self.metadata_dict["dataset"]["path"] + + for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): + corpus, queries, qrels = HFDataLoader( + hf_repo=dataset_path, + streaming=False, + keep_in_memory=False, + ).load(split=split) + # directly pass in corpus and queries datasets now to prevent loading into memory + # queries = {query["id"]: query for query in queries} + # corpus = {doc["id"]: doc for doc in corpus} + self.corpus[split], self.queries[split], self.relevant_docs[split] = ( + corpus, + queries, + qrels, + ) + + self.data_loaded = True + + def evaluate( + self, + model, + split: str = "test", + *, + encode_kwargs: dict[str, Any] = {}, + **kwargs, + ): + retriever = Any2AnyMultiChoiceEvaluator( + retriever=model, + task_name=self.metadata.name, + encode_kwargs=encode_kwargs, + **kwargs, + ) + + scores = {} + hf_subsets = list(self.hf_subsets) if self.is_multilingual else ["default"] + + for hf_subset in hf_subsets: + logger.info(f"Subset: {hf_subset}") + + if hf_subset == "default": + corpus, queries, relevant_docs = ( + self.corpus[split], + self.queries[split], + self.relevant_docs[split], + ) + else: + corpus, queries, relevant_docs = ( + self.corpus[hf_subset][split], + self.queries[hf_subset][split], + self.relevant_docs[hf_subset][split], + ) + scores[hf_subset] = self._evaluate_subset( + retriever, corpus, queries, relevant_docs, hf_subset, **kwargs + ) + return scores + + def _evaluate_subset( + self, retriever, corpus, queries, relevant_docs, hf_subset: str, **kwargs + ): + start_time = time() + results = retriever(corpus, queries, relevant_docs) + end_time = time() + logger.info(f"Time taken to retrieve: {end_time - start_time:.2f} seconds") + + save_predictions = kwargs.get("save_predictions", False) + export_errors = kwargs.get("export_errors", False) + if save_predictions or export_errors: + output_folder = Path(kwargs.get("output_folder", "results")) + if not os.path.isdir(output_folder): + os.makedirs(output_folder) + + if save_predictions: + top_k = kwargs.get("top_k", None) + if top_k is not None: + for qid in list(results.keys()): + doc_ids = set( + sorted( + results[qid], key=lambda x: results[qid][x], reverse=True + )[:top_k] + ) + results[qid] = { + k: v for k, v in results[qid].items() if k in doc_ids + } + qrels_save_path = ( + output_folder / f"{self.metadata.name}_{hf_subset}_predictions.json" + ) + + with open(qrels_save_path, "w") as f: + json.dump(results, f) + + ndcg, _map, recall, precision, cv_recall, naucs = retriever.evaluate( + relevant_docs, + results, + retriever.k_values, + ignore_identical_ids=self.ignore_identical_ids, + skip_first_result=self.skip_first_result, + ) + mrr, naucs_mrr = retriever.evaluate_custom( + relevant_docs, results, retriever.k_values, "mrr" + ) + scores = { + **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, + **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, + "accuracy": recall["Recall@1"] + } + self._add_main_score(scores) + + if export_errors: + errors = {} + + top_k = kwargs.get("top_k", 1) + if not save_predictions and top_k == 1: + for qid in results.keys(): + doc_scores = results[qid] + sorted_docs = sorted( + doc_scores.items(), key=lambda x: x[1], reverse=True + )[:top_k] + results[qid] = dict(sorted_docs) + for qid, retrieved_docs in results.items(): + expected_docs = relevant_docs[qid] + false_positives = [ + doc for doc in retrieved_docs if doc not in expected_docs + ] + false_negatives = [ + doc for doc in expected_docs if doc not in retrieved_docs + ] + if false_positives or false_negatives: + errors[qid] = { + "false_positives": false_positives, + "false_negatives": false_negatives, + } + + errors_save_path = ( + output_folder / f"{self.metadata.name}_{hf_subset}_errors.json" + ) + with open(errors_save_path, "w") as f: + json.dump(errors, f) + + return scores + + def _add_main_score(self, scores: ScoresDict) -> None: + scores["main_score"] = scores[self.metadata.main_score] + + def _calculate_metrics_from_split( + self, split: str, hf_subset: str | None = None, compute_overall: bool = False + ): + pass + + def calculate_metadata_metrics(self) -> None: + self.load_data() + + all_details = {} + pbar_split = tqdm.tqdm( + self.metadata_dict["eval_splits"], desc="Processing Splits..." + ) + for split in pbar_split: + pbar_split.set_postfix_str(f"Split: {split}") + print(f"Processing metadata for split {split}") + all_details[split] = {} + if self.is_multilingual: + pbar_lang = tqdm.tqdm( + self.relevant_docs.keys(), desc="Processing Languages..." + ) + for lang in pbar_lang: + pbar_lang.set_postfix_str(f"Language: {lang}") + print(f"Processing metadata for language {lang}") + split_details = process_language( + self.relevant_docs[lang][split], + self.queries[lang][split], + self.corpus[lang][split], + lang, + ) + all_details[split][lang] = split_details + else: + split_details = process_language( + self.relevant_docs[split], self.queries[split], self.corpus[split] + ) + all_details[split] = split_details + + return all_details + + +def process_language(relevant_docs, queries, corpus, lang=None): + """We want to get three pieces of information: + - the number of documents (and their char length) in the corpus + - the number of queries (and their char length) + - the average number of relevant documents per query + """ + query_len, doc_len = calculate_length(queries, corpus) + num_documents = len(corpus) + num_queries = len(queries) + + # number of qrels that are not 0 + num_qrels_non_zero = sum( + sum(1 for doc_id in docs if docs[doc_id] != 0) + for docs in relevant_docs.values() + ) + qrels_per_doc = num_qrels_non_zero / num_queries if num_queries else 0 + + language_description = f" for language {lang}" if lang else "" + print(f"Average document character length{language_description} is {doc_len}") + print(f"Average query character length{language_description} is {query_len}") + print(f"Number of documents{language_description} is {num_documents}") + print(f"Number of queries{language_description} is {num_queries}") + print( + f"Average number of relevant documents per query{language_description} is {qrels_per_doc}" + ) + return { + "average_document_length": doc_len, + "average_query_length": query_len, + "num_documents": num_documents, + "num_queries": num_queries, + "average_relevant_docs_per_query": qrels_per_doc, + } + + +def calculate_length(queries, corpus): + queries_lens = [] + doc_lens = [] + for query in queries.values(): + queries_lens.append(len(query)) + + for doc in corpus.values(): + if isinstance(doc, Image.Image): + doc_lens.append(1.0) # for image append 1. Can perhaps be removed. + + doc_len = sum(doc_lens) / len(doc_lens) if doc_lens else 0 + query_len = sum(queries_lens) / len(queries_lens) if queries_lens else 0 + return query_len, doc_len diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index f188430f48..c557761781 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -13,6 +13,7 @@ from .AbsTaskSpeedTask import * from .AbsTaskSTS import * from .AbsTaskSummarization import * +from .Image.AbsTaskAny2AnyMultiChoice import * from .Image.AbsTaskAny2AnyRetrieval import * from .Image.AbsTaskImageClassification import * from .Image.AbsTaskImageClustering import * diff --git a/mteb/evaluation/evaluators/Image/Any2AnyMultiChoiceEvaluator.py b/mteb/evaluation/evaluators/Image/Any2AnyMultiChoiceEvaluator.py new file mode 100644 index 0000000000..20e8547536 --- /dev/null +++ b/mteb/evaluation/evaluators/Image/Any2AnyMultiChoiceEvaluator.py @@ -0,0 +1,486 @@ +from __future__ import annotations + +import heapq +import io +import json +import logging +import os +from collections import defaultdict +from typing import Any + +import numpy as np +import pytrec_eval +import torch +from datasets import Dataset +from PIL import Image +from torch.utils.data import DataLoader +from torchvision import transforms + +from mteb.encoder_interface import EncoderWithQueryCorpusEncode + +from ..Evaluator import Evaluator +from ..utils import ( + confidence_scores, + cos_sim, + dot_score, + download, + hole, + mrr, + nAUC, + recall_cap, + top_k_accuracy, +) + +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +logger = logging.getLogger(__name__) + +transform = transforms.Compose([transforms.PILToTensor()]) + + +class ImageDataset(torch.utils.data.Dataset): + def __init__(self, hf_dataset, image_column_name: str = "image", transform=None): + self.dataset = hf_dataset + self.transform = transform + self.image_column_name = image_column_name + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, idx): + image = self.dataset[idx][self.image_column_name] + if isinstance(image, bytes): + image = Image.open(io.BytesIO(image)) + else: + # Assume the image is already in a usable format (e.g., PIL Image) + image = image + if image.mode != "RGB": + image = image.convert("RGB") + image = self.transform(image) + return image + + +def custom_collate_fn(batch): + return batch + + +# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/search/dense/exact_search.py#L12 +class Any2AnyMultiChoiceSearch: + def __init__( + self, + model: EncoderWithQueryCorpusEncode, + encode_kwargs: dict[str, Any] = {}, + corpus_chunk_size: int = 20000, + previous_results: str | None = None, + **kwargs: Any, + ): + # Model is class that provides get_text_embeddings() and get_image_embeddings() + self.model = model + self.encode_kwargs = encode_kwargs + + if "batch_size" not in encode_kwargs: + encode_kwargs["batch_size"] = 128 + + self.score_functions = {"cos_sim": cos_sim, "dot": dot_score} + self.score_function_desc = { + "cos_sim": "Cosine Similarity", + "dot": "Dot Product", + } + self.corpus_chunk_size = corpus_chunk_size + self.previous_results = previous_results + self.batch_size = encode_kwargs.get("batch_size") + self.show_progress_bar = encode_kwargs.get("show_progress_bar") + self.save_corpus_embeddings = kwargs.get("save_corpus_embeddings", False) + self.corpus_embeddings = defaultdict(list) + self.results = {} + + if self.previous_results is not None: + self.previous_results = self.load_results_file() + + def search( + self, + corpus: Dataset, # solve memoery issues + queries: Dataset, # solve memoery issues + qrels: Dataset, + top_k: int, + score_function: str, + return_sorted: bool = False, + **kwargs, + ) -> dict[str, dict[str, float]]: + if score_function not in self.score_functions: + raise ValueError( + f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product" + ) + + logger.info("Encoding Queries.") + query_ids = list(queries["id"]) + self.results = {qid: {} for qid in query_ids} + + q_modality = queries[0]["modality"] + + if q_modality == "text": + query_texts = queries["text"] + query_embeddings = self.model.get_text_embeddings( + texts=query_texts, batch_size=self.encode_kwargs["batch_size"] + ) + else: + queries_dataset = ImageDataset( + queries, image_column_name="image", transform=transform + ) + query_image_dataloader = DataLoader( + queries_dataset, + batch_size=self.encode_kwargs["batch_size"], + shuffle=False, + collate_fn=custom_collate_fn, + num_workers=max(1, os.cpu_count() // 2), + ) + if q_modality == "image": + query_embeddings = self.model.get_image_embeddings( + images=query_image_dataloader, + batch_size=self.encode_kwargs["batch_size"], + ) + elif q_modality == "image,text": + query_texts = queries["text"] + query_embeddings = self.model.get_fused_embeddings( + texts=query_texts, + images=query_image_dataloader, + batch_size=self.encode_kwargs["batch_size"], + ) + else: + raise ValueError(f"Unsupported modality: {q_modality}") + + logger.info("Preparing Corpus...") + corpus_ids = list(corpus["id"]) + + corpus_modality = corpus[0]["modality"] + + logger.info("Encoding Corpus in batches... Warning: This might take a while!") + logger.info( + f"Scoring Function: {self.score_function_desc[score_function]} ({score_function})" + ) + + result_heaps = {qid: [] for qid in query_ids} + for chunk_start in range(0, len(corpus), self.corpus_chunk_size): + chunk = corpus.select( + range( + chunk_start, min(chunk_start + self.corpus_chunk_size, len(corpus)) + ) + ) + chunk_ids = corpus_ids[chunk_start : chunk_start + self.corpus_chunk_size] + + if corpus_modality == "text": + corpus_texts = chunk["text"] + sub_corpus_embeddings = self.model.get_text_embeddings( + texts=corpus_texts, batch_size=self.encode_kwargs["batch_size"] + ) + else: + corpus_dataset = ImageDataset( + chunk, image_column_name="image", transform=transform + ) + corpus_image_dataloader = DataLoader( + corpus_dataset, + batch_size=self.encode_kwargs["batch_size"], + shuffle=False, + collate_fn=custom_collate_fn, + num_workers=max(1, os.cpu_count() // 2), + ) + if corpus_modality == "image": + sub_corpus_embeddings = self.model.get_image_embeddings( + images=corpus_image_dataloader, + batch_size=self.encode_kwargs["batch_size"], + ) + elif corpus_modality == "image,text": + corpus_texts = chunk["text"] + sub_corpus_embeddings = self.model.get_fused_embeddings( + texts=corpus_texts, + images=corpus_image_dataloader, + batch_size=self.encode_kwargs["batch_size"], + ) + else: + raise ValueError(f"Unsupported modality: {corpus_modality}") + + cos_scores = self.score_functions[score_function]( + query_embeddings, sub_corpus_embeddings + ) + cos_scores[torch.isnan(cos_scores)] = -1 + + for query_idx in range(len(query_embeddings)): + query_id = query_ids[query_idx] + # discount answers which aren't a multiple choice (where there is a qrel entry for both query and corpus id) + for c_idx, c_id in enumerate(chunk_ids): + if c_id not in qrels[query_id]: + cos_scores[query_idx, c_idx] = -1 + + cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( + cos_scores, + min(top_k, cos_scores.size(1)), + dim=1, + largest=True, + sorted=return_sorted, + ) + cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist() + cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist() + + for query_itr in range(len(query_embeddings)): + query_id = query_ids[query_itr] + for sub_corpus_id, score in zip( + cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr] + ): + corpus_id = chunk_ids[sub_corpus_id] + if len(result_heaps[query_id]) < top_k: + heapq.heappush(result_heaps[query_id], (score, corpus_id)) + else: + heapq.heappushpop(result_heaps[query_id], (score, corpus_id)) + + for qid in result_heaps: + for score, corpus_id in result_heaps[qid]: + self.results[qid][corpus_id] = score + + return self.results + + def load_results_file(self): + # load the first stage results from file in format {qid: {doc_id: score}} + if "https://" in self.previous_results: + # download the file + if not os.path.exists(self.previous_results): + url_descriptor = self.previous_results.split("https://")[-1].replace( + "/", "--" + ) + dest_file = os.path.join( + "results", f"cached_predictions--{url_descriptor}" + ) + os.makedirs(os.path.dirname(os.path.abspath(dest_file)), exist_ok=True) + download(self.previous_results, dest_file) + logger.info( + f"Downloaded the previous results at {self.previous_results} to {dest_file}" + ) + self.previous_results = dest_file + + with open(self.previous_results) as f: + previous_results = json.load(f) + assert isinstance(previous_results, dict) + assert isinstance(previous_results[list(previous_results.keys())[0]], dict) + return previous_results + + +class Any2AnyMultiChoiceEvaluator(Evaluator): + def __init__( + self, + retriever=None, + task_name: str | None = None, + k_values: list[int] = [1, 3, 5, 10, 20, 100, 1000], + score_function: str = "cos_sim", + encode_kwargs: dict[str, Any] = {}, + **kwargs, + ): + super().__init__(**kwargs) + + self.retriever = Any2AnyMultiChoiceSearch( + retriever, encode_kwargs=encode_kwargs, **kwargs + ) + self.k_values = k_values + self.top_k = ( + max(k_values) if "top_k" not in kwargs else kwargs["top_k"] + ) # can lower it if reranking + self.score_function = score_function + self.task_name = task_name + + def __call__( + self, + corpus: dict[str, dict[str, str | Image.Image]], + queries: dict[str, dict[str, str | Image.Image]], + qrels: dict[str, dict[str, int]], + ) -> dict[str, dict[str, float]]: + if not self.retriever: + raise ValueError("Model/Technique has not been provided!") + + return self.retriever.search( + corpus, + queries, + qrels, + self.top_k, + self.score_function, + prompt_name=self.task_name, # type: ignore + ) + + @staticmethod + def evaluate( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: list[int], + ignore_identical_ids: bool = False, + skip_first_result: bool = False, + ) -> tuple[ + dict[str, float], + dict[str, float], + dict[str, float], + dict[str, float], + dict[str, float], + ]: + if ignore_identical_ids: + logger.debug( + "For evaluation, ``ignore_identical_ids=True`` is set to True, the evaluator will ignore identical query and document ids." + ) + # Remove identical ids from results dict + for qid, rels in results.items(): + for pid in list(rels): + if qid == pid: + results[qid].pop(pid) + else: + logger.debug( + "For evaluation, we DO NOT ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=True`` to ignore this." + ) + + all_ndcgs, all_aps, all_recalls, all_precisions, all_cv_recalls = ( + {}, + {}, + {}, + {}, + {}, + ) + + for k in k_values: + all_ndcgs[f"NDCG@{k}"] = [] + all_aps[f"MAP@{k}"] = [] + all_recalls[f"Recall@{k}"] = [] + all_precisions[f"P@{k}"] = [] + all_cv_recalls[f"CV_Recall@{k}"] = [] # (new) CV-style Recall + + map_string = "map_cut." + ",".join([str(k) for k in k_values]) + ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values]) + recall_string = "recall." + ",".join([str(k) for k in k_values]) + precision_string = "P." + ",".join([str(k) for k in k_values]) + evaluator = pytrec_eval.RelevanceEvaluator( + qrels, {map_string, ndcg_string, recall_string, precision_string} + ) + scores = evaluator.evaluate(results) + + sorted_results = { + qid: sorted(rels.items(), key=lambda item: item[1], reverse=True) + for qid, rels in results.items() + } + + if skip_first_result: + for qid, rels in sorted_results.items(): + sorted_results[qid].pop(0) + + for query_id in scores.keys(): + top_docs = [ + doc_id for doc_id, _ in sorted_results.get(query_id, []) + ] # Sorted list of doc IDs + # we need to discount qrels that have a ground truth score of 0 + relevant_docs = { + key + for key in qrels.get(query_id, {}).keys() + if qrels[query_id][key] != 0 + } + + for k in k_values: + top_k_docs = top_docs[:k] + all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)]) + all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)]) + all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)]) + all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)]) + + if relevant_docs.intersection(top_k_docs): + all_cv_recalls[f"CV_Recall@{k}"].append(1.0) + else: + all_cv_recalls[f"CV_Recall@{k}"].append(0.0) + + ndcg, _map, recall, precision, cv_recall = ( + all_ndcgs.copy(), + all_aps.copy(), + all_recalls.copy(), + all_precisions.copy(), + all_cv_recalls.copy(), + ) + + for k in k_values: + ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5) + _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5) + recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5) + precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5) + cv_recall[f"CV_Recall@{k}"] = round( + sum(cv_recall[f"CV_Recall@{k}"]) / len(scores), 5 + ) + + naucs = Any2AnyMultiChoiceEvaluator.evaluate_abstention( + results, + {**all_ndcgs, **all_aps, **all_recalls, **all_precisions, **all_cv_recalls}, + ) + + return ndcg, _map, recall, precision, cv_recall, naucs + + @staticmethod + def evaluate_custom( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: list[int], + metric: str, + output_type: str = "all", + ) -> tuple[dict[str, float]]: + if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]: + metric_scores = mrr(qrels, results, k_values, output_type) + + elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]: + metric_scores = recall_cap(qrels, results, k_values, output_type) + + elif metric.lower() in ["hole", "hole@k"]: + metric_scores = hole(qrels, results, k_values, output_type) + + elif metric.lower() in [ + "acc", + "top_k_acc", + "accuracy", + "accuracy@k", + "top_k_accuracy", + ]: + metric_scores = top_k_accuracy(qrels, results, k_values, output_type) + + naucs = Any2AnyMultiChoiceEvaluator.evaluate_abstention(results, metric_scores) + metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()} + + return metric_scores_avg, naucs + + @staticmethod + def evaluate_abstention( + results: dict[str, dict[str, float]], + metric_scores: dict[str, list[float]], + ) -> dict[str, float]: + """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997""" + all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())] + all_conf_scores = [ + confidence_scores(sim_scores) for sim_scores in all_sim_scores + ] + conf_fcts = list(all_conf_scores[0].keys()) + all_conf_scores = { + fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts + } + metric_scores = {k: np.array(v) for k, v in metric_scores.items()} + naucs = {} + + for metric_name, scores in metric_scores.items(): + for fct, conf_scores in all_conf_scores.items(): + naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores) + + return naucs + + @staticmethod + def calculate_cv_style_recall( + qrels: dict[str, dict[str, int]], results: dict[str, dict[str, float]], k: int + ) -> dict[str, float]: + """Calculate CV-style recall: Recall is 1 if any relevant document is + retrieved in the top k, otherwise 0. + """ + cv_recalls = {} + for query_id, relevant_docs in qrels.items(): + retrieved_docs = list(results.get(query_id, {}).keys())[ + :k + ] # Retrieve top k documents + if any(doc_id in relevant_docs for doc_id in retrieved_docs): + cv_recalls[query_id] = ( + 1.0 # If any relevant doc is found in top k, recall is 1 + ) + else: + cv_recalls[query_id] = 0.0 # Otherwise, recall is 0 + return cv_recalls diff --git a/mteb/evaluation/evaluators/__init__.py b/mteb/evaluation/evaluators/__init__.py index ce7da0db59..b19cb8e92a 100644 --- a/mteb/evaluation/evaluators/__init__.py +++ b/mteb/evaluation/evaluators/__init__.py @@ -3,6 +3,7 @@ from .BitextMiningEvaluator import * from .ClassificationEvaluator import * from .ClusteringEvaluator import * +from .Image.Any2AnyMultiChoiceEvaluator import * from .Image.Any2AnyRetrievalEvaluator import * from .Image.ClassificationEvaluator import * from .Image.ClusteringEvaluator import * diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py b/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py new file mode 100644 index 0000000000..b317e8cabd --- /dev/null +++ b/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +from .eng.ImageCoDeT2IMultiChoice import * diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py new file mode 100644 index 0000000000..1f00290cdd --- /dev/null +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ImageCoDeT2IMultiChoice(AbsTaskAny2AnyMultiChoice): + metadata = TaskMetadata( + name="ImageCoDeT2IMultiChoice", + description="Identify the correct image from a set of similar images based on a precise caption.", + reference="https://aclanthology.org/2022.acl-long.241.pdf", + dataset={ + "path": "JamieSJS/imagecode-multi", + "revision": "d28adfd8b34fefa546fdf94bdc352622b2575f6c", + }, + type="Retrieval", + category="t2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_1", + date=("2022-05-22", "2022-05-27"), # conference dates + form=["written"], + domains=["Web"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + socioeconomic_status="medium", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{krojer2022image, + title={Image retrieval from contextual descriptions}, + author={Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, + journal={arXiv preprint arXiv:2203.15867}, + year={2022} +} +""", + descriptive_stats={ + "n_samples": {"test": 2302}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 23020, + "num_queries": 2302, + "average_relevant_docs_per_query": 1.0, + } + }, + }, + ) diff --git a/mteb/tasks/Image/__init__.py b/mteb/tasks/Image/__init__.py index cf632fe736..317008e107 100644 --- a/mteb/tasks/Image/__init__.py +++ b/mteb/tasks/Image/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +from .Any2AnyMultiChoice import * from .Any2AnyRetrieval import * from .Clustering import * from .ImageClassification import * diff --git a/mteb/tasks/__init__.py b/mteb/tasks/__init__.py index 0d7d1d5fc0..8d49517136 100644 --- a/mteb/tasks/__init__.py +++ b/mteb/tasks/__init__.py @@ -3,6 +3,7 @@ from .BitextMining import * from .Classification import * from .Clustering import * +from .Image.Any2AnyMultiChoice import * from .Image.Any2AnyRetrieval import * from .Image.Clustering import * from .Image.ImageClassification import * diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IMultiChoice.json new file mode 100644 index 0000000000..1f3e5fbd9e --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ImageCoDeT2IMultiChoice.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "d28adfd8b34fefa546fdf94bdc352622b2575f6c", + "evaluation_time": 459.3987202644348, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.13206, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.13206, + "mrr_at_1": 0.13205907906168549, + "mrr_at_10": 0.32183470550108295, + "mrr_at_100": 0.32183470550108295, + "mrr_at_1000": 0.32183470550108295, + "mrr_at_20": 0.32183470550108295, + "mrr_at_3": 0.2158268172603526, + "mrr_at_5": 0.2607225600926729, + "ndcg_at_1": 0.13206, + "ndcg_at_10": 0.47717, + "ndcg_at_100": 0.47717, + "ndcg_at_1000": 0.47717, + "ndcg_at_20": 0.47717, + "ndcg_at_3": 0.24566, + "ndcg_at_5": 0.32738 + } + ] + }, + "task_name": "ImageCoDeT2IMultiChoice" +} \ No newline at end of file From dab591339c75b5ef2b0fa9691061a188f580499e Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 18 Oct 2024 15:09:32 +0100 Subject: [PATCH 54/73] make lint --- mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py index ca37645e67..a8d0dde0ea 100644 --- a/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py +++ b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py @@ -322,7 +322,7 @@ def _evaluate_subset( scores = { **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, - "accuracy": recall["Recall@1"] + "accuracy": recall["Recall@1"], } self._add_main_score(scores) From 1123d535d285b5bc229e72dc7061a726e4698f7c Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Fri, 18 Oct 2024 15:32:10 +0100 Subject: [PATCH 55/73] remove GLDv2I2IRetrieval --- mteb/tasks/Image/Any2AnyRetrieval/__init__.py | 1 - .../Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py | 50 ----- .../GLDv2I2IRetrieval.json | 186 ------------------ 3 files changed, 237 deletions(-) delete mode 100644 mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py index 5ad2bba3bf..0c7a44ab52 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py @@ -10,7 +10,6 @@ from .eng.Flickr30kI2TRetrieval import * from .eng.Flickr30kT2IRetrieval import * from .eng.FORBI2IRetrieval import * -from .eng.GLDv2I2IRetrieval import * from .eng.GLDv2I2TRetrieval import * from .eng.HatefulMemesI2TRetrieval import * from .eng.HatefulMemesT2IRetrieval import * diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py deleted file mode 100644 index 5ae81959f2..0000000000 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import annotations - -from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval -from mteb.abstasks.TaskMetadata import TaskMetadata - - -class GLDv2I2IRetrieval(AbsTaskAny2AnyRetrieval): - metadata = TaskMetadata( - name="GLDv2I2IRetrieval", - description="Retrieve names of landmarks based on their image.", - reference="https://openaccess.thecvf.com/content_CVPR_2020/html/Weyand_Google_Landmarks_Dataset_v2_-_A_Large-Scale_Benchmark_for_Instance-Level_CVPR_2020_paper.html", - dataset={ - "path": "JamieSJS/gld-v2-i2i", - "revision": "b9286d3e5b0404de5fcd9f52cabd88de4968727b", - }, - type="Retrieval", - category="i2i", - eval_splits=["test"], - eval_langs=["eng-Latn"], - main_score="ndcg_at_10", - date=("2017-01-01", "2017-12-31"), - domains=["Encyclopaedic"], - task_subtypes=["Image Text Retrieval"], - license="Apache-2.0", - annotations_creators="derived", - dialect=[], - modalities=["image"], - sample_creation="created", - bibtex_citation="""@InProceedings{Weyand_2020_CVPR, -author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, -title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, -booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {June}, -year = {2020} -} - -""", - descriptive_stats={ - "n_samples": {"test": 1972}, - "avg_character_length": { - "test": { - "average_document_length": 0.0, - "average_query_length": 0.0, - "num_documents": 1972, - "num_queries": 1972, - "average_relevant_docs_per_query": 5.27, - } - }, - }, - ) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json deleted file mode 100644 index 7b65577dfd..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/GLDv2I2IRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "b9286d3e5b0404de5fcd9f52cabd88de4968727b", - "evaluation_time": 15.037300109863281, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 1.0, - "cv_recall_at_10": 1.0, - "cv_recall_at_100": 1.0, - "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 1.0, - "cv_recall_at_3": 1.0, - "cv_recall_at_5": 1.0, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 1.0, - "map_at_1": 1.0, - "map_at_10": 1.0, - "map_at_100": 1.0, - "map_at_1000": 1.0, - "map_at_20": 1.0, - "map_at_3": 1.0, - "map_at_5": 1.0, - "mrr_at_1": 1.0, - "mrr_at_10": 1.0, - "mrr_at_100": 1.0, - "mrr_at_1000": 1.0, - "mrr_at_20": 1.0, - "mrr_at_3": 1.0, - "mrr_at_5": 1.0, - "nauc_cv_recall_at_1000_diff1": NaN, - "nauc_cv_recall_at_1000_max": NaN, - "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": NaN, - "nauc_cv_recall_at_100_max": NaN, - "nauc_cv_recall_at_100_std": NaN, - "nauc_cv_recall_at_10_diff1": NaN, - "nauc_cv_recall_at_10_max": NaN, - "nauc_cv_recall_at_10_std": NaN, - "nauc_cv_recall_at_1_diff1": NaN, - "nauc_cv_recall_at_1_max": NaN, - "nauc_cv_recall_at_1_std": NaN, - "nauc_cv_recall_at_20_diff1": NaN, - "nauc_cv_recall_at_20_max": NaN, - "nauc_cv_recall_at_20_std": NaN, - "nauc_cv_recall_at_3_diff1": NaN, - "nauc_cv_recall_at_3_max": NaN, - "nauc_cv_recall_at_3_std": NaN, - "nauc_cv_recall_at_5_diff1": NaN, - "nauc_cv_recall_at_5_max": NaN, - "nauc_cv_recall_at_5_std": NaN, - "nauc_map_at_1000_diff1": NaN, - "nauc_map_at_1000_max": NaN, - "nauc_map_at_1000_std": NaN, - "nauc_map_at_100_diff1": NaN, - "nauc_map_at_100_max": NaN, - "nauc_map_at_100_std": NaN, - "nauc_map_at_10_diff1": NaN, - "nauc_map_at_10_max": NaN, - "nauc_map_at_10_std": NaN, - "nauc_map_at_1_diff1": NaN, - "nauc_map_at_1_max": NaN, - "nauc_map_at_1_std": NaN, - "nauc_map_at_20_diff1": NaN, - "nauc_map_at_20_max": NaN, - "nauc_map_at_20_std": NaN, - "nauc_map_at_3_diff1": NaN, - "nauc_map_at_3_max": NaN, - "nauc_map_at_3_std": NaN, - "nauc_map_at_5_diff1": NaN, - "nauc_map_at_5_max": NaN, - "nauc_map_at_5_std": NaN, - "nauc_mrr_at_1000_diff1": NaN, - "nauc_mrr_at_1000_max": NaN, - "nauc_mrr_at_1000_std": NaN, - "nauc_mrr_at_100_diff1": NaN, - "nauc_mrr_at_100_max": NaN, - "nauc_mrr_at_100_std": NaN, - "nauc_mrr_at_10_diff1": NaN, - "nauc_mrr_at_10_max": NaN, - "nauc_mrr_at_10_std": NaN, - "nauc_mrr_at_1_diff1": NaN, - "nauc_mrr_at_1_max": NaN, - "nauc_mrr_at_1_std": NaN, - "nauc_mrr_at_20_diff1": NaN, - "nauc_mrr_at_20_max": NaN, - "nauc_mrr_at_20_std": NaN, - "nauc_mrr_at_3_diff1": NaN, - "nauc_mrr_at_3_max": NaN, - "nauc_mrr_at_3_std": NaN, - "nauc_mrr_at_5_diff1": NaN, - "nauc_mrr_at_5_max": NaN, - "nauc_mrr_at_5_std": NaN, - "nauc_ndcg_at_1000_diff1": NaN, - "nauc_ndcg_at_1000_max": NaN, - "nauc_ndcg_at_1000_std": NaN, - "nauc_ndcg_at_100_diff1": NaN, - "nauc_ndcg_at_100_max": NaN, - "nauc_ndcg_at_100_std": NaN, - "nauc_ndcg_at_10_diff1": NaN, - "nauc_ndcg_at_10_max": NaN, - "nauc_ndcg_at_10_std": NaN, - "nauc_ndcg_at_1_diff1": NaN, - "nauc_ndcg_at_1_max": NaN, - "nauc_ndcg_at_1_std": NaN, - "nauc_ndcg_at_20_diff1": NaN, - "nauc_ndcg_at_20_max": NaN, - "nauc_ndcg_at_20_std": NaN, - "nauc_ndcg_at_3_diff1": NaN, - "nauc_ndcg_at_3_max": NaN, - "nauc_ndcg_at_3_std": NaN, - "nauc_ndcg_at_5_diff1": NaN, - "nauc_ndcg_at_5_max": NaN, - "nauc_ndcg_at_5_std": NaN, - "nauc_precision_at_1000_diff1": 1.0, - "nauc_precision_at_1000_max": 1.0, - "nauc_precision_at_1000_std": 1.0, - "nauc_precision_at_100_diff1": NaN, - "nauc_precision_at_100_max": NaN, - "nauc_precision_at_100_std": NaN, - "nauc_precision_at_10_diff1": 1.0, - "nauc_precision_at_10_max": 1.0, - "nauc_precision_at_10_std": 1.0, - "nauc_precision_at_1_diff1": NaN, - "nauc_precision_at_1_max": NaN, - "nauc_precision_at_1_std": NaN, - "nauc_precision_at_20_diff1": 1.0, - "nauc_precision_at_20_max": 1.0, - "nauc_precision_at_20_std": 1.0, - "nauc_precision_at_3_diff1": 1.0, - "nauc_precision_at_3_max": 1.0, - "nauc_precision_at_3_std": 1.0, - "nauc_precision_at_5_diff1": 1.0, - "nauc_precision_at_5_max": 1.0, - "nauc_precision_at_5_std": 1.0, - "nauc_recall_at_1000_diff1": NaN, - "nauc_recall_at_1000_max": NaN, - "nauc_recall_at_1000_std": NaN, - "nauc_recall_at_100_diff1": NaN, - "nauc_recall_at_100_max": NaN, - "nauc_recall_at_100_std": NaN, - "nauc_recall_at_10_diff1": NaN, - "nauc_recall_at_10_max": NaN, - "nauc_recall_at_10_std": NaN, - "nauc_recall_at_1_diff1": NaN, - "nauc_recall_at_1_max": NaN, - "nauc_recall_at_1_std": NaN, - "nauc_recall_at_20_diff1": NaN, - "nauc_recall_at_20_max": NaN, - "nauc_recall_at_20_std": NaN, - "nauc_recall_at_3_diff1": NaN, - "nauc_recall_at_3_max": NaN, - "nauc_recall_at_3_std": NaN, - "nauc_recall_at_5_diff1": NaN, - "nauc_recall_at_5_max": NaN, - "nauc_recall_at_5_std": NaN, - "ndcg_at_1": 1.0, - "ndcg_at_10": 1.0, - "ndcg_at_100": 1.0, - "ndcg_at_1000": 1.0, - "ndcg_at_20": 1.0, - "ndcg_at_3": 1.0, - "ndcg_at_5": 1.0, - "precision_at_1": 1.0, - "precision_at_10": 0.1, - "precision_at_100": 0.01, - "precision_at_1000": 0.001, - "precision_at_20": 0.05, - "precision_at_3": 0.33333, - "precision_at_5": 0.2, - "recall_at_1": 1.0, - "recall_at_10": 1.0, - "recall_at_100": 1.0, - "recall_at_1000": 1.0, - "recall_at_20": 1.0, - "recall_at_3": 1.0, - "recall_at_5": 1.0 - } - ] - }, - "task_name": "GLDv2I2IRetrieval" -} \ No newline at end of file From d0cddd47df2f3712c32b87c887909d7a373041cd Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Sun, 20 Oct 2024 20:34:03 +0100 Subject: [PATCH 56/73] exclude AbsTaskAny2AnyMultiChoice from test_load_data --- tests/test_tasks/test_all_abstasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index c9f1f59ac6..d4c8e44a88 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -13,6 +13,7 @@ from mteb.abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval from mteb.abstasks.AbsTaskSpeedTask import AbsTaskSpeedTask +from mteb.abstasks.Image.AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval from mteb.abstasks.MultiSubsetLoader import MultiSubsetLoader from mteb.overview import TASKS_REGISTRY @@ -39,6 +40,7 @@ def test_load_data( or isinstance(task, AbsTaskInstructionRetrieval) or isinstance(task, MultiSubsetLoader) or isinstance(task, AbsTaskSpeedTask) + or isinstance(task, AbsTaskAny2AnyMultiChoice) ): pytest.skip() with patch.object(task, "dataset_transform") as mock_dataset_transform: From bc8aa0123003e1e77b28214b1268ca50b4a349e3 Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Mon, 21 Oct 2024 11:27:52 +0100 Subject: [PATCH 57/73] fix e5v&vista --- mteb/models/e5_v.py | 71 +++++++++++++----- mteb/models/vista_models.py | 73 +++++++++++++------ .../model_meta.json | 2 +- .../model_meta.json | 2 +- 4 files changed, 105 insertions(+), 43 deletions(-) diff --git a/mteb/models/e5_v.py b/mteb/models/e5_v.py index b3287f274c..b35ced2b4d 100644 --- a/mteb/models/e5_v.py +++ b/mteb/models/e5_v.py @@ -5,12 +5,12 @@ import torch from PIL import Image +from torch.utils.data import DataLoader from tqdm import tqdm from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor from mteb.model_meta import ModelMeta - class E5VWrapper: def __init__( self, @@ -56,9 +56,23 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 8): all_text_embeddings.append(text_outputs.cpu()) return torch.cat(all_text_embeddings, dim=0) - def get_image_embeddings(self, images: list[Image.Image], batch_size: int = 8): + def get_image_embeddings( + self, images: list[Image.Image] | DataLoader, batch_size: int = 8 + ): all_image_embeddings = [] + if isinstance(images, DataLoader): + for batch_images in tqdm(images): + img_inputs = self.processor( + [self.img_prompt] * len(batch_images), + batch_images, + return_tensors="pt", + padding=True, + ).to("cuda") + image_outputs = self.model( + **img_inputs, output_hidden_states=True, return_dict=True + ).hidden_states[-1][:, -1, :] + all_image_embeddings.append(image_outputs.cpu()) with torch.no_grad(): for i in tqdm(range(0, len(images), batch_size)): batch_images = images[i : i + batch_size] @@ -95,24 +109,41 @@ def get_fused_embeddings( all_fused_embeddings = [] if texts is not None and images is not None: - if len(texts) != len(images): - raise ValueError( - "The number of texts and images must have the same length" - ) - with torch.no_grad(): - for i in tqdm(range(0, len(images), batch_size)): - batch_texts = texts[i : i + batch_size] - batch_images = images[i : i + batch_size] - prompts = [ - self.composed_prompt.format(text) for text in batch_texts - ] - inputs = self.processor( - prompts, batch_images, return_tensors="pt", padding=True - ).to("cuda") - outputs = self.model( - **inputs, output_hidden_states=True, return_dict=True - ).hidden_states[-1][:, -1, :] - all_fused_embeddings.append(outputs.cpu()) + if isinstance(images, DataLoader): + with torch.no_grad(): + for index, batch_images in enumerate(tqdm(images)): + batch_texts = texts[ + index * batch_size : (index + 1) * batch_size + ] + prompts = [ + self.composed_prompt.format(text) for text in batch_texts + ] + inputs = self.processor( + prompts, batch_images, return_tensors="pt", padding=True + ).to("cuda") + outputs = self.model( + **inputs, output_hidden_states=True, return_dict=True + ).hidden_states[-1][:, -1, :] + all_fused_embeddings.append(outputs.cpu()) + else: + if len(texts) != len(images): + raise ValueError( + "The number of texts and images must have the same length" + ) + with torch.no_grad(): + for i in tqdm(range(0, len(images), batch_size)): + batch_texts = texts[i : i + batch_size] + batch_images = images[i : i + batch_size] + prompts = [ + self.composed_prompt.format(text) for text in batch_texts + ] + inputs = self.processor( + prompts, batch_images, return_tensors="pt", padding=True + ).to("cuda") + outputs = self.model( + **inputs, output_hidden_states=True, return_dict=True + ).hidden_states[-1][:, -1, :] + all_fused_embeddings.append(outputs.cpu()) return torch.cat(all_fused_embeddings, dim=0) elif texts is not None: diff --git a/mteb/models/vista_models.py b/mteb/models/vista_models.py index 007bbfe37f..755ebd3798 100644 --- a/mteb/models/vista_models.py +++ b/mteb/models/vista_models.py @@ -4,10 +4,14 @@ import torch from PIL import Image +from torch.utils.data import DataLoader +from torchvision import transforms from tqdm import tqdm from mteb.model_meta import ModelMeta +tensor_to_image = transforms.Compose([transforms.ToPILImage()]) + def vista_loader(**kwargs): try: # a temporal fix for the dependency issues of vista models. @@ -88,15 +92,21 @@ def encode_text(self, texts): t_reps = torch.nn.functional.normalize(t_reps, dim=-1) return t_reps.contiguous() - def encode(self, images=None, texts=None): + def encode(self, images=None, texts=None, tensors=False): if images is not None: if isinstance(images, list): - images = [ - self.preprocess_val( - img if isinstance(img, Image.Image) else Image.open(img) - ) - for img in images - ] + if not tensors: + images = [ + self.preprocess_val( + img if isinstance(img, Image.Image) else Image.open(img) + ) + for img in images + ] + else: + images = [ + self.preprocess_val(tensor_to_image(image)) + for image in images + ] images = torch.stack(images) if texts is not None: texts = self.tokenizer(texts, return_tensors="pt", padding=True) @@ -119,31 +129,52 @@ def get_text_embeddings(self, texts: list[str], batch_size: int = 32): all_text_embeddings.append(batch_embeddings.cpu()) return torch.cat(all_text_embeddings, dim=0) - def get_image_embeddings(self, images: list[Image.Image], batch_size: int = 32): + def get_image_embeddings( + self, images: list[Image.Image] | DataLoader, batch_size: int = 32 + ): all_image_embeddings = [] - for i in tqdm(range(0, len(images), batch_size)): - batch_images = images[i : i + batch_size] + + if isinstance(images, DataLoader): with torch.no_grad(): - batch_embeddings = self.encode(images=batch_images) - all_image_embeddings.append(batch_embeddings.cpu()) + for batch in tqdm(images): + batch_embeddings = self.encode(images=batch, tensors=True) + all_image_embeddings.append(batch_embeddings.cpu()) + else: + with torch.no_grad(): + for i in tqdm(range(0, len(images), batch_size)): + batch_images = images[i : i + batch_size] + batch_embeddings = self.encode(images=batch_images) + all_image_embeddings.append(batch_embeddings.cpu()) return torch.cat(all_image_embeddings, dim=0) def get_fused_embeddings( self, texts: list[str] = None, - images: list[Image.Image] = None, + images: list[Image.Image] | DataLoader = None, batch_size: int = 32, ): all_embeddings = [] - assert len(texts) == len(images) - for i in tqdm(range(0, len(texts), batch_size)): - batch_texts = texts[i : i + batch_size] - batch_images = images[i : i + batch_size] + + if isinstance(images, DataLoader): + with torch.no_grad(): + for index, batch_images in enumerate(tqdm(images)): + batch_texts = texts[ + index * batch_size : (index + 1) * batch_size + ] + batch_embeddings = self.encode( + images=batch_images, texts=batch_texts, tensors=True + ) + all_embeddings.append(batch_embeddings.cpu()) + else: + assert len(texts) == len(images) with torch.no_grad(): - batch_embeddings = self.encode( - images=batch_images, texts=batch_texts - ) - all_embeddings.append(batch_embeddings.cpu()) + for i in tqdm(range(0, len(texts), batch_size)): + batch_texts = texts[i : i + batch_size] + batch_images = images[i : i + batch_size] + batch_embeddings = self.encode( + images=batch_images, texts=batch_texts + ) + all_embeddings.append(batch_embeddings.cpu()) return torch.cat(all_embeddings, dim=0) def calculate_probs(self, text_embeddings, image_embeddings): diff --git a/results-mieb/BAAI__bge-visualized-base/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json b/results-mieb/BAAI__bge-visualized-base/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json index 746dfa90fd..a73369f513 100644 --- a/results-mieb/BAAI__bge-visualized-base/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json +++ b/results-mieb/BAAI__bge-visualized-base/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json @@ -1 +1 @@ -{"name": "BAAI/bge-visualized", "revision": "98db10b10d22620010d06f11733346e1c98c34aa", "release_date": "2024-06-06", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "VisualizedBGEWrapper"} \ No newline at end of file +{"name": "BAAI/bge-visualized-base", "revision": "98db10b10d22620010d06f11733346e1c98c34aa", "release_date": "2024-06-06", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "vista_loader"} \ No newline at end of file diff --git a/results-mieb/BAAI__bge-visualized-m3/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json b/results-mieb/BAAI__bge-visualized-m3/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json index 992116b866..a2b05c9208 100644 --- a/results-mieb/BAAI__bge-visualized-m3/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json +++ b/results-mieb/BAAI__bge-visualized-m3/98db10b10d22620010d06f11733346e1c98c34aa/model_meta.json @@ -1 +1 @@ -{"name": "BAAI/bge-visualized-m3", "revision": "98db10b10d22620010d06f11733346e1c98c34aa", "release_date": "2024-06-06", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "VisualizedBGEWrapper"} \ No newline at end of file +{"name": "BAAI/bge-visualized-m3", "revision": "98db10b10d22620010d06f11733346e1c98c34aa", "release_date": "2024-06-06", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "vista_loader"} \ No newline at end of file From 6d8ed9b954ea87b2bf0dbb64213036813715d015 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 21 Oct 2024 14:29:49 +0100 Subject: [PATCH 58/73] remove duplicate corpus entries from BLINKIT2TRetreival dataset --- .../eng/BLINKIT2TRetrieval.py | 2 +- .../BLINKIT2TRetrieval.json | 342 +++++++++--------- 2 files changed, 172 insertions(+), 172 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py index 2c652c6388..c006d7240d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py @@ -11,7 +11,7 @@ class BLINKIT2TRetrieval(AbsTaskAny2AnyRetrieval): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2t", - "revision": "4ab83c87ac5b24e3b730f86d585671493a3a423c", + "revision": "302cf2008f204285985099dcd46425b00356c610", "trust_remote_code": True, }, type="Retrieval", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json index d701facb1e..21d3b7f0dc 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "4ab83c87ac5b24e3b730f86d585671493a3a423c", - "evaluation_time": 13.768992900848389, + "dataset_revision": "302cf2008f204285985099dcd46425b00356c610", + "evaluation_time": 11.171209335327148, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.00093, - "cv_recall_at_10": 0.00373, - "cv_recall_at_100": 0.06151, - "cv_recall_at_1000": 0.49301, - "cv_recall_at_20": 0.01025, - "cv_recall_at_3": 0.00093, - "cv_recall_at_5": 0.00093, + "cv_recall_at_1": 0.08947, + "cv_recall_at_10": 0.64678, + "cv_recall_at_100": 1.0, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.96645, + "cv_recall_at_3": 0.2479, + "cv_recall_at_5": 0.37651, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.00209, - "map_at_1": 0.00093, - "map_at_10": 0.00139, - "map_at_100": 0.00274, - "map_at_1000": 0.00397, - "map_at_20": 0.00176, - "map_at_3": 0.00093, - "map_at_5": 0.00093, - "mrr_at_1": 0.0009319664492078285, - "mrr_at_10": 0.0013147383837039009, - "mrr_at_100": 0.002685356046665323, - "mrr_at_1000": 0.003948307638249348, - "mrr_at_20": 0.0016977247814228705, - "mrr_at_3": 0.0009319664492078285, - "mrr_at_5": 0.0009319664492078285, - "nauc_cv_recall_at_1000_diff1": 0.21618180421784144, - "nauc_cv_recall_at_1000_max": 0.1966391139887544, - "nauc_cv_recall_at_1000_std": 0.6300616078101204, - "nauc_cv_recall_at_100_diff1": 0.17443594463110845, - "nauc_cv_recall_at_100_max": 0.3594175633623019, - "nauc_cv_recall_at_100_std": 0.5893797181186331, - "nauc_cv_recall_at_10_diff1": -0.05990075938028686, - "nauc_cv_recall_at_10_max": 0.43603863739514936, - "nauc_cv_recall_at_10_std": 0.6462545358013111, - "nauc_cv_recall_at_1_diff1": -0.49952309145315554, - "nauc_cv_recall_at_1_max": 0.6610449563695936, - "nauc_cv_recall_at_1_std": 1.0, - "nauc_cv_recall_at_20_diff1": 0.1175713728405099, - "nauc_cv_recall_at_20_max": 0.36443381136528874, - "nauc_cv_recall_at_20_std": 0.3549079762661633, - "nauc_cv_recall_at_3_diff1": -0.49952309145315554, - "nauc_cv_recall_at_3_max": 0.6610449563695936, - "nauc_cv_recall_at_3_std": 1.0, - "nauc_cv_recall_at_5_diff1": -0.49952309145315554, - "nauc_cv_recall_at_5_max": 0.6610449563695936, - "nauc_cv_recall_at_5_std": 1.0, - "nauc_map_at_1000_diff1": 0.3461462162176906, - "nauc_map_at_1000_max": 0.30890589543493097, - "nauc_map_at_1000_std": 0.707200194236762, - "nauc_map_at_100_diff1": 0.4005511014650457, - "nauc_map_at_100_max": 0.30811121696833377, - "nauc_map_at_100_std": 0.7144966661138995, - "nauc_map_at_10_diff1": 0.5208347947487291, - "nauc_map_at_10_max": 0.206028460069979, - "nauc_map_at_10_std": 0.8872521734282164, - "nauc_map_at_1_diff1": 0.6610449563695936, - "nauc_map_at_1_max": 0.09839818901557038, - "nauc_map_at_1_std": 1.0, - "nauc_map_at_20_diff1": 0.4542718670585475, - "nauc_map_at_20_max": 0.26815935842012756, - "nauc_map_at_20_std": 0.7972838331802248, - "nauc_map_at_3_diff1": 0.6610449563695936, - "nauc_map_at_3_max": 0.09839818901557038, - "nauc_map_at_3_std": 1.0, - "nauc_map_at_5_diff1": 0.6610449563695936, - "nauc_map_at_5_max": 0.09839818901557038, - "nauc_map_at_5_std": 1.0, - "nauc_mrr_at_1000_diff1": -0.013155580619472381, - "nauc_mrr_at_1000_max": 0.4252494452276833, - "nauc_mrr_at_1000_std": 0.687428882142861, - "nauc_mrr_at_100_diff1": -0.05688061033793547, - "nauc_mrr_at_100_max": 0.4528834485316897, - "nauc_mrr_at_100_std": 0.6942429482097452, - "nauc_mrr_at_10_diff1": -0.3361389575479778, - "nauc_mrr_at_10_max": 0.5699031562786795, - "nauc_mrr_at_10_std": 0.8567106980461006, - "nauc_mrr_at_1_diff1": -0.49952309145315554, - "nauc_mrr_at_1_max": 0.6610449563695936, - "nauc_mrr_at_1_std": 1.0, - "nauc_mrr_at_20_diff1": -0.2128688275475949, - "nauc_mrr_at_20_max": 0.5173348447205537, - "nauc_mrr_at_20_std": 0.6977606046374574, - "nauc_mrr_at_3_diff1": -0.49952309145315554, - "nauc_mrr_at_3_max": 0.6610449563695936, - "nauc_mrr_at_3_std": 1.0, - "nauc_mrr_at_5_diff1": -0.49952309145315554, - "nauc_mrr_at_5_max": 0.6610449563695936, - "nauc_mrr_at_5_std": 1.0, - "nauc_ndcg_at_1000_diff1": 0.2267671022987118, - "nauc_ndcg_at_1000_max": 0.2471557214473064, - "nauc_ndcg_at_1000_std": 0.6348682667422092, - "nauc_ndcg_at_100_diff1": 0.2912265250539601, - "nauc_ndcg_at_100_max": 0.3559110476318641, - "nauc_ndcg_at_100_std": 0.5901042407967306, - "nauc_ndcg_at_10_diff1": 0.42096720717204206, - "nauc_ndcg_at_10_max": 0.2838485324864617, - "nauc_ndcg_at_10_std": 0.7948899807288888, - "nauc_ndcg_at_1_diff1": 0.6610449563695936, - "nauc_ndcg_at_1_max": 0.09839818901557038, - "nauc_ndcg_at_1_std": 1.0, - "nauc_ndcg_at_20_diff1": 0.3330912830350423, - "nauc_ndcg_at_20_max": 0.36298197701132934, - "nauc_ndcg_at_20_std": 0.6588880474018856, - "nauc_ndcg_at_3_diff1": 0.6610449563695936, - "nauc_ndcg_at_3_max": 0.09839818901557038, - "nauc_ndcg_at_3_std": 1.0, - "nauc_ndcg_at_5_diff1": 0.6610449563695936, - "nauc_ndcg_at_5_max": 0.09839818901557038, - "nauc_ndcg_at_5_std": 1.0, - "nauc_precision_at_1000_diff1": 0.21618180421784106, - "nauc_precision_at_1000_max": 0.19663911398875364, - "nauc_precision_at_1000_std": 0.6300616078101199, - "nauc_precision_at_100_diff1": 0.2633008669431452, - "nauc_precision_at_100_max": 0.35813105058108047, - "nauc_precision_at_100_std": 0.5625569512563295, - "nauc_precision_at_10_diff1": 0.31227200390917714, - "nauc_precision_at_10_max": 0.3688258547365643, - "nauc_precision_at_10_std": 0.6910567239150112, - "nauc_precision_at_1_diff1": 0.6610449563695936, - "nauc_precision_at_1_max": 0.09839818901557038, - "nauc_precision_at_1_std": 1.0, - "nauc_precision_at_20_diff1": 0.24987842391547985, - "nauc_precision_at_20_max": 0.4262053224877857, - "nauc_precision_at_20_std": 0.5596936671568136, - "nauc_precision_at_3_diff1": 0.6610449563695937, - "nauc_precision_at_3_max": 0.09839818901557038, - "nauc_precision_at_3_std": 1.0, - "nauc_precision_at_5_diff1": 0.6610449563695937, - "nauc_precision_at_5_max": 0.09839818901557036, - "nauc_precision_at_5_std": 1.0, - "nauc_recall_at_1000_diff1": 0.21618180421784144, - "nauc_recall_at_1000_max": 0.1966391139887544, - "nauc_recall_at_1000_std": 0.6300616078101204, - "nauc_recall_at_100_diff1": 0.2633008669431453, - "nauc_recall_at_100_max": 0.3581310505810804, - "nauc_recall_at_100_std": 0.5625569512563297, - "nauc_recall_at_10_diff1": 0.3122720039091771, - "nauc_recall_at_10_max": 0.3688258547365644, - "nauc_recall_at_10_std": 0.691056723915011, - "nauc_recall_at_1_diff1": 0.6610449563695936, - "nauc_recall_at_1_max": 0.09839818901557038, - "nauc_recall_at_1_std": 1.0, - "nauc_recall_at_20_diff1": 0.24987842391547993, - "nauc_recall_at_20_max": 0.4262053224877856, - "nauc_recall_at_20_std": 0.5596936671568133, - "nauc_recall_at_3_diff1": 0.6610449563695936, - "nauc_recall_at_3_max": 0.09839818901557038, - "nauc_recall_at_3_std": 1.0, - "nauc_recall_at_5_diff1": 0.6610449563695936, - "nauc_recall_at_5_max": 0.09839818901557038, - "nauc_recall_at_5_std": 1.0, - "ndcg_at_1": 0.00093, - "ndcg_at_10": 0.00209, - "ndcg_at_100": 0.0128, - "ndcg_at_1000": 0.06231, - "ndcg_at_20": 0.00349, - "ndcg_at_3": 0.00093, - "ndcg_at_5": 0.00093, - "precision_at_1": 0.00093, - "precision_at_10": 0.00047, - "precision_at_100": 0.00065, - "precision_at_1000": 0.00049, - "precision_at_20": 0.00051, - "precision_at_3": 0.00031, - "precision_at_5": 0.00019, - "recall_at_1": 0.00093, - "recall_at_10": 0.00466, - "recall_at_100": 0.06524, - "recall_at_1000": 0.49301, - "recall_at_20": 0.01025, - "recall_at_3": 0.00093, - "recall_at_5": 0.00093 + "main_score": 0.32202, + "map_at_1": 0.08947, + "map_at_10": 0.22464, + "map_at_100": 0.24864, + "map_at_1000": 0.24864, + "map_at_20": 0.24715, + "map_at_3": 0.16061, + "map_at_5": 0.18987, + "mrr_at_1": 0.08946877912395154, + "mrr_at_10": 0.2246397875708215, + "mrr_at_100": 0.24864178823066596, + "mrr_at_1000": 0.24864178823066596, + "mrr_at_20": 0.24714520392298733, + "mrr_at_3": 0.16060888474681598, + "mrr_at_5": 0.18987263125194162, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": 0.6643245257587762, + "nauc_cv_recall_at_10_max": 0.3394378418139761, + "nauc_cv_recall_at_10_std": 0.5631230256620064, + "nauc_cv_recall_at_1_diff1": 0.3501554820571643, + "nauc_cv_recall_at_1_max": 0.3599782655731798, + "nauc_cv_recall_at_1_std": 0.7041481187202615, + "nauc_cv_recall_at_20_diff1": 0.5587451940825555, + "nauc_cv_recall_at_20_max": 0.47434883444027837, + "nauc_cv_recall_at_20_std": 0.6875358512675467, + "nauc_cv_recall_at_3_diff1": 0.5321534332531633, + "nauc_cv_recall_at_3_max": 0.3540044845277292, + "nauc_cv_recall_at_3_std": 0.7076694823976849, + "nauc_cv_recall_at_5_diff1": 0.6570524014466579, + "nauc_cv_recall_at_5_max": 0.3001299949877772, + "nauc_cv_recall_at_5_std": 0.6030407237879231, + "nauc_map_at_1000_diff1": 0.4915782753126093, + "nauc_map_at_1000_max": 0.3523024760071231, + "nauc_map_at_1000_std": 0.6868659279458145, + "nauc_map_at_100_diff1": 0.4915782753126093, + "nauc_map_at_100_max": 0.3523024760071231, + "nauc_map_at_100_std": 0.6868659279458145, + "nauc_map_at_10_diff1": 0.507291819494973, + "nauc_map_at_10_max": 0.34661422520261387, + "nauc_map_at_10_std": 0.6748191769380557, + "nauc_map_at_1_diff1": 0.3501554820571643, + "nauc_map_at_1_max": 0.3599782655731798, + "nauc_map_at_1_std": 0.7041481187202615, + "nauc_map_at_20_diff1": 0.49198266429986276, + "nauc_map_at_20_max": 0.35311180756517996, + "nauc_map_at_20_std": 0.6868445762814864, + "nauc_map_at_3_diff1": 0.4648021929978972, + "nauc_map_at_3_max": 0.3663517861281038, + "nauc_map_at_3_std": 0.7205919836574776, + "nauc_map_at_5_diff1": 0.5087022522085294, + "nauc_map_at_5_max": 0.3467565269949981, + "nauc_map_at_5_std": 0.683363887437365, + "nauc_mrr_at_1000_diff1": 0.4915782753126093, + "nauc_mrr_at_1000_max": 0.3523024760071231, + "nauc_mrr_at_1000_std": 0.6868659279458145, + "nauc_mrr_at_100_diff1": 0.4915782753126093, + "nauc_mrr_at_100_max": 0.3523024760071231, + "nauc_mrr_at_100_std": 0.6868659279458145, + "nauc_mrr_at_10_diff1": 0.507291819494973, + "nauc_mrr_at_10_max": 0.34661422520261387, + "nauc_mrr_at_10_std": 0.6748191769380557, + "nauc_mrr_at_1_diff1": 0.3501554820571643, + "nauc_mrr_at_1_max": 0.3599782655731798, + "nauc_mrr_at_1_std": 0.7041481187202615, + "nauc_mrr_at_20_diff1": 0.49198266429986276, + "nauc_mrr_at_20_max": 0.35311180756517996, + "nauc_mrr_at_20_std": 0.6868445762814864, + "nauc_mrr_at_3_diff1": 0.4648021929978972, + "nauc_mrr_at_3_max": 0.3663517861281038, + "nauc_mrr_at_3_std": 0.7205919836574776, + "nauc_mrr_at_5_diff1": 0.5087022522085294, + "nauc_mrr_at_5_max": 0.3467565269949981, + "nauc_mrr_at_5_std": 0.683363887437365, + "nauc_ndcg_at_1000_diff1": 0.5054976057073902, + "nauc_ndcg_at_1000_max": 0.3504516506472678, + "nauc_ndcg_at_1000_std": 0.6795326611960923, + "nauc_ndcg_at_100_diff1": 0.5054976057073902, + "nauc_ndcg_at_100_max": 0.3504516506472678, + "nauc_ndcg_at_100_std": 0.6795326611960923, + "nauc_ndcg_at_10_diff1": 0.5529277638560192, + "nauc_ndcg_at_10_max": 0.3418676893611852, + "nauc_ndcg_at_10_std": 0.6438431211574437, + "nauc_ndcg_at_1_diff1": 0.3501554820571643, + "nauc_ndcg_at_1_max": 0.3599782655731798, + "nauc_ndcg_at_1_std": 0.7041481187202615, + "nauc_ndcg_at_20_diff1": 0.5075033561523076, + "nauc_ndcg_at_20_max": 0.35530204332453247, + "nauc_ndcg_at_20_std": 0.6798053401234159, + "nauc_ndcg_at_3_diff1": 0.48649313476516426, + "nauc_ndcg_at_3_max": 0.36277467680108344, + "nauc_ndcg_at_3_std": 0.7169907526506151, + "nauc_ndcg_at_5_diff1": 0.5565020379741223, + "nauc_ndcg_at_5_max": 0.33223528863871, + "nauc_ndcg_at_5_std": 0.6584725779305518, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": NaN, + "nauc_precision_at_100_max": NaN, + "nauc_precision_at_100_std": NaN, + "nauc_precision_at_10_diff1": 0.6643245257587758, + "nauc_precision_at_10_max": 0.3394378418139761, + "nauc_precision_at_10_std": 0.563123025662006, + "nauc_precision_at_1_diff1": 0.3501554820571643, + "nauc_precision_at_1_max": 0.3599782655731798, + "nauc_precision_at_1_std": 0.7041481187202615, + "nauc_precision_at_20_diff1": 0.5587451940825576, + "nauc_precision_at_20_max": 0.47434883444027603, + "nauc_precision_at_20_std": 0.6875358512675491, + "nauc_precision_at_3_diff1": 0.5321534332531633, + "nauc_precision_at_3_max": 0.3540044845277291, + "nauc_precision_at_3_std": 0.7076694823976849, + "nauc_precision_at_5_diff1": 0.6570524014466578, + "nauc_precision_at_5_max": 0.30012999498777754, + "nauc_precision_at_5_std": 0.6030407237879232, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.6643245257587762, + "nauc_recall_at_10_max": 0.3394378418139761, + "nauc_recall_at_10_std": 0.5631230256620064, + "nauc_recall_at_1_diff1": 0.3501554820571643, + "nauc_recall_at_1_max": 0.3599782655731798, + "nauc_recall_at_1_std": 0.7041481187202615, + "nauc_recall_at_20_diff1": 0.5587451940825555, + "nauc_recall_at_20_max": 0.47434883444027837, + "nauc_recall_at_20_std": 0.6875358512675467, + "nauc_recall_at_3_diff1": 0.5321534332531633, + "nauc_recall_at_3_max": 0.3540044845277292, + "nauc_recall_at_3_std": 0.7076694823976849, + "nauc_recall_at_5_diff1": 0.6570524014466579, + "nauc_recall_at_5_max": 0.3001299949877772, + "nauc_recall_at_5_std": 0.6030407237879231, + "ndcg_at_1": 0.08947, + "ndcg_at_10": 0.32202, + "ndcg_at_100": 0.41063, + "ndcg_at_1000": 0.41063, + "ndcg_at_20": 0.40326, + "ndcg_at_3": 0.18308, + "ndcg_at_5": 0.23594, + "precision_at_1": 0.08947, + "precision_at_10": 0.06468, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.04832, + "precision_at_3": 0.08263, + "precision_at_5": 0.0753, + "recall_at_1": 0.08947, + "recall_at_10": 0.64678, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 0.96645, + "recall_at_3": 0.2479, + "recall_at_5": 0.37651 } ] }, From 53065b8efd70eb6527f8138409ca901985779063 Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Mon, 21 Oct 2024 14:32:56 +0100 Subject: [PATCH 59/73] task type fix for running tasks --- mteb/abstasks/TaskMetadata.py | 10 ++++++-- mteb/models/e5_v.py | 1 + mteb/models/vista_models.py | 2 ++ .../eng/ImageCoDeT2IMultiChoice.py | 2 +- .../eng/BLINKIT2IRetrieval.py | 2 +- .../eng/BLINKIT2TRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py | 2 +- .../eng/CUB200I2IRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/FORBI2IRetrieval.py | 2 +- .../eng/Fashion200kI2TRetrieval.py | 2 +- .../eng/Fashion200kT2IRetrieval.py | 2 +- .../eng/FashionIQIT2IRetrieval.py | 2 +- .../eng/Flickr30kI2TRetrieval.py | 2 +- .../eng/Flickr30kT2IRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py | 2 +- .../eng/HatefulMemesI2TRetrieval.py | 2 +- .../eng/HatefulMemesT2IRetrieval.py | 2 +- .../eng/ImageCoDeT2IRetrieval.py | 2 +- .../eng/InfoSeekIT2ITRetrieval.py | 2 +- .../eng/InfoSeekIT2TRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/METI2IRetrieval.py | 2 +- .../eng/MSCOCOI2TRetrieval.py | 2 +- .../eng/MSCOCOT2IRetrieval.py | 2 +- .../eng/MemotionI2TRetrieval.py | 2 +- .../eng/MemotionT2IRetrieval.py | 2 +- .../eng/NIGHTSI2IRetrieval.py | 2 +- .../eng/OVENIT2ITRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/OVENIT2TRetrieval.py | 2 +- .../eng/ROxfordI2IRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/RP2kI2IRetrieval.py | 2 +- .../eng/RParisI2IRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 2 +- .../eng/SciMMIRI2TRetrieval.py | 2 +- .../eng/SciMMIRT2IRetrieval.py | 2 +- .../eng/SketchyI2IRetrieval.py | 2 +- .../eng/StanfordCarsI2IRetrieval.py | 2 +- .../eng/TUBerlinT2IRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py | 2 +- .../eng/VidoreBenchRetrieval.py | 2 +- .../eng/VisualNewsI2TRetrieval.py | 2 +- .../eng/VisualNewsT2IRetrieval.py | 2 +- .../eng/VizWizIT2TRetrieval.py | 2 +- .../eng/WebQAT2ITRetrieval.py | 2 +- .../Any2AnyRetrieval/eng/WebQAT2TRetrieval.py | 2 +- .../multilingual/WITT2IRetrieval.py | 2 +- .../multilingual/XFlickr30kCoT2IRetrieval.py | 2 +- .../multilingual/XM3600T2IRetrieval.py | 2 +- .../Any2TextMultipleChoice/eng/CVBench.py | 24 +++++++++---------- mteb/tasks/Image/Clustering/eng/CIFAR.py | 2 +- mteb/tasks/Image/Clustering/eng/ImageNet.py | 2 +- .../Image/Clustering/eng/TinyImageNet.py | 2 +- .../eng/BirdsnapClassification.py | 2 +- .../Image/ImageClassification/eng/CIFAR.py | 2 +- .../eng/Caltech101Classification.py | 2 +- .../eng/Country211Classification.py | 2 +- .../eng/DTDClassification.py | 2 +- .../eng/EuroSATClassification.py | 2 +- .../eng/FER2013Classification.py | 2 +- .../eng/FGVCAircraftClassification.py | 2 +- .../eng/Food101Classification.py | 2 +- .../eng/GTSRBClassification.py | 2 +- .../ImageClassification/eng/Imagenet1k.py | 2 +- .../eng/MNISTClassification.py | 2 +- .../eng/OxfordFlowersClassification.py | 2 +- .../eng/OxfordPetsClassification.py | 2 +- .../eng/PatchCamelyonClassification.py | 2 +- .../eng/RESISC45Classification.py | 2 +- .../eng/STL10Classification.py | 2 +- .../eng/SUN397Classification.py | 2 +- .../eng/StanfordCarsClassification.py | 2 +- .../eng/UCF101Classification.py | 2 +- .../eng/PascalVOC2007.py | 2 +- .../Image/VisualSTS/en/STS12VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS13VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS14VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS15VisualSTS.py | 2 +- .../Image/VisualSTS/en/STS16VisualSTS.py | 2 +- .../STS17MultilingualVisualSTS.py | 2 +- .../STSBenchmarkMultilingualVisualSTS.py | 2 +- .../ZeroshotClassification/eng/Country211.py | 2 +- .../ZeroshotClassification/eng/Imagenet1k.py | 2 +- .../ZeroshotClassification/eng/UCF101.py | 2 +- 82 files changed, 101 insertions(+), 92 deletions(-) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 09582d779b..c7b6839015 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -98,9 +98,15 @@ "Summarization", "InstructionRetrieval", "Speed", - "ZeroShotClassification", - "ImageTextPairClassification", + "Any2AnyMultiChoice", + "Any2AnyRetrieval", "Any2TextMutipleChoice", + "ImageClustering", + "ImageClassification", + "ImageMultilabelClassification", + "ImageTextPairClassification", + "VisualSTS", + "ZeroShotClassification", ] TASK_CATEGORY = Literal[ diff --git a/mteb/models/e5_v.py b/mteb/models/e5_v.py index b35ced2b4d..70bc20cabf 100644 --- a/mteb/models/e5_v.py +++ b/mteb/models/e5_v.py @@ -11,6 +11,7 @@ from mteb.model_meta import ModelMeta + class E5VWrapper: def __init__( self, diff --git a/mteb/models/vista_models.py b/mteb/models/vista_models.py index 755ebd3798..c86fdcd5b6 100644 --- a/mteb/models/vista_models.py +++ b/mteb/models/vista_models.py @@ -1,6 +1,7 @@ from __future__ import annotations from functools import partial +from typing import Any import torch from PIL import Image @@ -31,6 +32,7 @@ def __init__( negatives_cross_device: bool = False, temperature: float = 0.02, from_pretrained=None, + **kwargs: Any, ): super().__init__( model_name_bge=model_name_bge, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py index 1f00290cdd..46fbb5b990 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py @@ -13,7 +13,7 @@ class ImageCoDeT2IMultiChoice(AbsTaskAny2AnyMultiChoice): "path": "JamieSJS/imagecode-multi", "revision": "d28adfd8b34fefa546fdf94bdc352622b2575f6c", }, - type="Retrieval", + type="Any2AnyMultiChoice", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py index ca265fd898..cd1ce67e4a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py @@ -14,7 +14,7 @@ class BLINKIT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "359b66f11c25d19bc8f7108d98e660a5857f3d26", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py index 2c652c6388..d0fb86ba90 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py @@ -14,7 +14,7 @@ class BLINKIT2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "4ab83c87ac5b24e3b730f86d585671493a3a423c", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py index 2e45933ea3..b215dfda06 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py @@ -14,7 +14,7 @@ class CIRRIT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "503301cd99348035b9675883a543aa1ded0cf07c", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py index c33f706c63..048f5a33bf 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py @@ -13,7 +13,7 @@ class CUB200I2I(AbsTaskAny2AnyRetrieval): "path": "isaacchung/cub200_retrieval", "revision": "ad08c1307b15a226bf1b64e62656a17f1f85f7ec", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py index 051aa324e6..ffc5ce14bf 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py @@ -13,7 +13,7 @@ class FORBI2I(AbsTaskAny2AnyRetrieval): "path": "isaacchung/forb_retrieval", "revision": "336607d5bcc853fb7f7276c2c9721d4b5b1ca8e4", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py index 3e24c8691f..04fad6e352 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py @@ -14,7 +14,7 @@ class Fashion200kI2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "96a313715ecf67f5dfe70c4fa52406bc7bdfbeee", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py index f54a3a38b2..54a1c24cf1 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py @@ -14,7 +14,7 @@ class Fashion200kT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "1b86e2dde50e671d5c83d07a79e8b1d8c696964b", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py index 6072354fe6..45b8e10576 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py @@ -14,7 +14,7 @@ class FashionIQIT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "e6f0ec70becc413d940cd62b2cfa3b1d3a08c31a", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py index f7278bcf37..267cf860b9 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -13,7 +13,7 @@ class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/flickr30k", "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py index 44164c90b6..576e6afa50 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -13,7 +13,7 @@ class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/flickr30k", "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py index 9539ef31b3..67b238a470 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py @@ -13,7 +13,7 @@ class GLDv2I2TRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/gld-v2-i2t", "revision": "d8c3e53160860f76de73ed3041a8593672fe5928", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py index c92a497914..40323d3636 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py @@ -68,7 +68,7 @@ class HatefulMemesI2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "c9a9a6c3ef0765622a6de0af6ebb68f323ad73ba", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py index 067396752a..fec70177db 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py @@ -68,7 +68,7 @@ class HatefulMemesT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "c9a9a6c3ef0765622a6de0af6ebb68f323ad73ba", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py index 3fae916f6b..6e34640459 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py @@ -13,7 +13,7 @@ class ImageCoDeT2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/imagecode", "revision": "a424cd523ffb157b69a875fb5e71c1d51be54089", }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py index e35da59fcb..abc71666aa 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py @@ -14,7 +14,7 @@ class InfoSeekIT2ITRetrieval(AbsTaskAny2AnyRetrieval): "revision": "78ee7f7708aac75d3afac5dcab1c9e03cb62664c", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2it", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py index 4d88a7ac80..a856969e75 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py @@ -14,7 +14,7 @@ class InfoSeekIT2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "d4f4606f7a42bbf311c2957419ef3734fe81c47f", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index e46b2635e5..b0578c5944 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -13,7 +13,7 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/met", "revision": "08ceaa61c0d172214abb3b8e82971d8f69d2aec0", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py index dff57f5a53..8652b2e8e0 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py @@ -14,7 +14,7 @@ class MSCOCOI2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "cca3a3e223763e6519a4d68936bc9279034d75d2", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py index 9ce5fd839e..4797e98911 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py @@ -14,7 +14,7 @@ class MSCOCOT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "cfe15bd2791dde5f8f20aebecf0b4eb3812972d6", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py index 5eda9cd295..c9d671d9d6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py @@ -97,7 +97,7 @@ class MemotionI2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "cdb15b61d84d56db73e0e59535dfea81ea3c22f4", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py index b82b6367a5..331e628f24 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py @@ -96,7 +96,7 @@ class MemotionT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "cdb15b61d84d56db73e0e59535dfea81ea3c22f4", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py index 3c7798c77c..ae0d91a6b5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py @@ -13,7 +13,7 @@ class NIGHTSI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_nights_task4", "revision": "c9583e052be7ad52d870c62a207a2e887ba9b8aa", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py index 0a720ec995..9bac08fa34 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py @@ -13,7 +13,7 @@ class OVENIT2ITRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_oven_task8", "revision": "350d14b7258189654e26a2be93dc0bd6bee09b76", }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2it", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py index 2c171c778d..0877cfdf33 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py @@ -13,7 +13,7 @@ class OVENIT2TRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_oven_task6", "revision": "2192074af29422bc1dc41cf07936f198b8c69bd0", }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index 39502a46d8..dc43e34e29 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -13,7 +13,7 @@ class ROxfordI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/r-oxford", "revision": "d8daad98b4e4896a7f7fa1b3485a22420242d4fc", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py index 321bb818be..61cd189fce 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py @@ -13,7 +13,7 @@ class RP2kI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/rp2k", "revision": "f8f82d4eb1aa4dc4dbf2c768596c8110a3703765", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index a112ded273..258ec836c6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -13,7 +13,7 @@ class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/r-paris", "revision": "bafc3a08fdffd72558021ce3a41250833d7e0e88", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index 09b33aac7d..5d754fe0e6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -13,7 +13,7 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/stanford-online-products", "revision": "0b3a1622902e6258425e673405bdfb1e5dfa8618", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py index cc96d134a0..0f7acedab0 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py @@ -73,7 +73,7 @@ class SciMMIRI2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "eea276dc58c52eab33e9476acb137ff5530b78e9", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py index 41c2c98e79..987a00ea6d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py @@ -73,7 +73,7 @@ class SciMMIRT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "eea276dc58c52eab33e9476acb137ff5530b78e9", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py index c89091f41d..5a4b13ec94 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py @@ -13,7 +13,7 @@ class SketchyI2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "JamieSJS/sketchy", "revision": "c8b8c1b7a2f0a92f1bfaaa1c9afc22aa42c61d5b", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py index 4a053f8cc0..e98633e899 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py @@ -13,7 +13,7 @@ class StanfordCarsI2I(AbsTaskAny2AnyRetrieval): "path": "isaacchung/stanford_cars_retrieval", "revision": "b27a0612211af3598bd11fe28af20928f20cce06", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py index 754fa14911..fe1c2891db 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py @@ -14,7 +14,7 @@ class TUBerlinT2IRetrieval(AbsTaskAny2AnyRetrieval): "revision": "0cd78cd1ddbd3cafa9f319c638ebd77836ec9ff6", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py index cb5c3b38e5..58e1c5d31e 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py @@ -14,7 +14,7 @@ class VQA2IT2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "69882b6ba0b443dd62e633e546725b0f13b7e3aa", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py index 6365b7adb3..fc73789541 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py @@ -100,7 +100,7 @@ class VidoreArxivQARetrieval(AbsTaskAny2AnyRetrieval): "path": "vidore/arxivqa_test_subsampled", "revision": "fe2b0e055eaac82d8f6801ebc8e85d8832248133", }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py index bf99c199a8..a36f5ea5fe 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py @@ -13,7 +13,7 @@ class VisualNewsI2TRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_visualnews_task3", "revision": "aaee58895a66e4d619168849267ed2bb40d37043", }, - type="Retrieval", + type="Any2AnyRetrieval", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py index 8bd3f8278f..aae9882d52 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py @@ -13,7 +13,7 @@ class VisualNewsT2IRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_visualnews_task0", "revision": "94c519d850dba2b0058c2fc9b5da6142a59aa285", }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py index 5565ca9f50..076f003b2b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py @@ -14,7 +14,7 @@ class VizWizIT2TRetrieval(AbsTaskAny2AnyRetrieval): "revision": "044af162d55f82ab603fa16ffcf7f1e4dbf300e9", "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="it2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py index b3f21869ed..fabbf48ed4 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py @@ -13,7 +13,7 @@ class WebQAT2ITRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_webqa_task2", "revision": "53db4c9f9c93cb74926a1c9d04dea7d7acac2f21", }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2it", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py index f53415087e..a98ee514a9 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py @@ -13,7 +13,7 @@ class WebQAT2TRetrieval(AbsTaskAny2AnyRetrieval): "path": "MRBench/mbeir_webqa_task1", "revision": "468b42a2b2e767d80d2d93f5ae5d42f135a10478", }, - type="Retrieval", + type="Any2AnyRetrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py index 5de06b937f..ee8b8c4148 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py @@ -101,7 +101,7 @@ class WITT2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): "revision": "91ac153f1371a98b209ed763205e25e115ecd06e", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=_LANGUAGES, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py index 65c886f314..507639a4df 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py @@ -84,7 +84,7 @@ class XFlickr30kCoT2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): "revision": "0af2c2eba58b27a71898787e286be04befdd7a20", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=_LANGUAGES, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py index 687c9f0446..7e78db8193 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py @@ -129,7 +129,7 @@ class XM3600T2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): "revision": "8d3e5665526c55a5855cd6ddfbaba2032bc7cee4", # "trust_remote_code": True, }, - type="Retrieval", + type="Any2AnyRetrieval", category="t2i", eval_splits=["test"], eval_langs=_LANGUAGES, diff --git a/mteb/tasks/Image/Any2TextMultipleChoice/eng/CVBench.py b/mteb/tasks/Image/Any2TextMultipleChoice/eng/CVBench.py index e42ec28f76..bb3c5db181 100644 --- a/mteb/tasks/Image/Any2TextMultipleChoice/eng/CVBench.py +++ b/mteb/tasks/Image/Any2TextMultipleChoice/eng/CVBench.py @@ -36,10 +36,10 @@ class CVBenchCount(AbsTaskAny2TextMultipleChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{wu2024scimmir, - title={placeholder}, - author={placeholder and others}, - journal={arXiv preprint arXiv:2401.13478}, + bibtex_citation="""@article{tong2024cambrian, + title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal={arXiv preprint arXiv:2406.16860}, year={2024} }""", descriptive_stats={ @@ -96,10 +96,10 @@ class CVBenchRelation(AbsTaskAny2TextMultipleChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{wu2024scimmir, - title={placeholder}, - author={placeholder and others}, - journal={arXiv preprint arXiv:2401.13478}, + bibtex_citation="""@article{tong2024cambrian, + title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal={arXiv preprint arXiv:2406.16860}, year={2024} }""", descriptive_stats={ @@ -158,10 +158,10 @@ class CVBenchDepth(AbsTaskAny2TextMultipleChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{wu2024scimmir, - title={placeholder}, - author={placeholder and others}, - journal={arXiv preprint arXiv:2401.13478}, + bibtex_citation="""@article{tong2024cambrian, + title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal={arXiv preprint arXiv:2406.16860}, year={2024} }""", descriptive_stats={ diff --git a/mteb/tasks/Image/Clustering/eng/CIFAR.py b/mteb/tasks/Image/Clustering/eng/CIFAR.py index 61250cc3f5..e9a0429cf9 100644 --- a/mteb/tasks/Image/Clustering/eng/CIFAR.py +++ b/mteb/tasks/Image/Clustering/eng/CIFAR.py @@ -13,7 +13,7 @@ class CIFAR10Clustering(AbsTaskImageClustering): "path": "uoft-cs/cifar10", "revision": "0b2714987fa478483af9968de7c934580d0bb9a2", }, - type="Clustering", + type="ImageClustering", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index daf8ab8dae..381b3d9d91 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -13,7 +13,7 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): "path": "JamieSJS/imagenet-dog-15", "revision": "bfb6ad3b2109d26c9daddf14f98d315daa35ee72", }, - type="Clustering", + type="ImageClustering", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/Clustering/eng/TinyImageNet.py b/mteb/tasks/Image/Clustering/eng/TinyImageNet.py index 14123e2111..96c557e3ab 100644 --- a/mteb/tasks/Image/Clustering/eng/TinyImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/TinyImageNet.py @@ -13,7 +13,7 @@ class TinyImageNet(AbsTaskImageClustering): "path": "zh-plus/tiny-imagenet", "revision": "5a77092c28e51558c5586e9c5eb71a7e17a5e43f", }, - type="Clustering", + type="ImageClustering", category="s2s", eval_splits=["valid"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py index f29259ae2a..cf9a18cc46 100644 --- a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py @@ -13,7 +13,7 @@ class BirdsnapClassification(AbsTaskImageClassification): "path": "isaacchung/birdsnap", "revision": "e09b9dea248d579376684268cbedba28cd66b9b4", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py index 2fe4fc2808..7560bff77e 100644 --- a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py @@ -13,7 +13,7 @@ class CIFAR10Classification(AbsTaskImageClassification): "path": "uoft-cs/cifar10", "revision": "0b2714987fa478483af9968de7c934580d0bb9a2", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py index 5c79a41046..0e00980428 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py @@ -14,7 +14,7 @@ class Caltech101Classification(AbsTaskImageClassification): "name": "with_background_category", "revision": "851374102055782c84f89b1b4e9d128a6568847b", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py index 5f34c09a14..7fcbd4b209 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py @@ -13,7 +13,7 @@ class Country211Classification(AbsTaskImageClassification): "path": "clip-benchmark/wds_country211", "revision": "1699f138f0558342a1cbf99f7cf36b4361bb5ebc", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py index aabb03f02a..6362785cb7 100644 --- a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py @@ -13,7 +13,7 @@ class DTDClassification(AbsTaskImageClassification): "path": "tanganke/dtd", "revision": "d2afa97d9f335b1a6b3b09c637aef667f98f966e", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py index 6ef26a0dba..45638643de 100644 --- a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py @@ -13,7 +13,7 @@ class EuroSATClassification(AbsTaskImageClassification): "path": "timm/eurosat-rgb", "revision": "b4e28552cd5f3932b6abc37eb20d3e84901ad728", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py index ea987fb4e2..49323aa4b6 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py @@ -13,7 +13,7 @@ class FER2013Classification(AbsTaskImageClassification): "path": "clip-benchmark/wds_fer2013", "revision": "9399b94167523fe5c40b3a857e24ef931ee4395b", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py index 74659b5e92..3db3ef2d8c 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py @@ -13,7 +13,7 @@ class FGVCAircraftClassification(AbsTaskImageClassification): "path": "HuggingFaceM4/FGVC-Aircraft", "revision": "91860adfc9a09aabca5cddb5247442109b38e213", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py index 34b2592e20..c508486997 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py @@ -13,7 +13,7 @@ class Food101Classification(AbsTaskImageClassification): "path": "ethz/food101", "revision": "e06acf2a88084f04bce4d4a525165d68e0a36c38", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["validation"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py index 6596151327..29c5ccc4c0 100644 --- a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py @@ -13,7 +13,7 @@ class GTSRBClassification(AbsTaskImageClassification): "path": "clip-benchmark/wds_gtsrb", "revision": "1c13eff0803d2b02c9dc8dfe85e67770b3f0f3c5", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py index d3b8474808..c8bfd62ce8 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py @@ -13,7 +13,7 @@ class Imagenet1kClassification(AbsTaskImageClassification): "path": "clip-benchmark/wds_imagenet1k", "revision": "b24c7a5a3ef12df09089055d1795e2ce7c7e7397", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py index 4ea68ddea3..5e9b9a86af 100644 --- a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py @@ -13,7 +13,7 @@ class MNISTClassification(AbsTaskImageClassification): "path": "ylecun/mnist", "revision": "b06aab39e05f7bcd9635d18ed25d06eae523c574", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py index d07badc30f..9e4a6f6aaa 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py @@ -13,7 +13,7 @@ class OxfordFlowersClassification(AbsTaskImageClassification): "path": "nelorth/oxford-flowers", "revision": "a37b1891609c0376fa81eced756e7863e1bd873b", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py index 603dad1278..09935a2735 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py @@ -13,7 +13,7 @@ class OxfordPetsClassification(AbsTaskImageClassification): "path": "isaacchung/OxfordPets", "revision": "557b480fae8d69247be74d9503b378a09425096f", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py index a6f9466672..0032266eaa 100644 --- a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py @@ -13,7 +13,7 @@ class PatchCamelyonClassification(AbsTaskImageClassification): "path": "clip-benchmark/wds_vtab-pcam", "revision": "502695fe1a141108650e3c5b91c8b5e0ff84ed49", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py index c767e3b334..d73abc76b9 100644 --- a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py @@ -13,7 +13,7 @@ class RESISC45Classification(AbsTaskImageClassification): "path": "timm/resisc45", "revision": "fe12fc5f1b7606543b0355eda392f1ddc54625c6", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py index 02593fe4e3..fe25c9d3d7 100644 --- a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py @@ -13,7 +13,7 @@ class STL10Classification(AbsTaskImageClassification): "path": "tanganke/stl10", "revision": "49ae7f94508f7feae62baf836db284306eab0b0f", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py index d23844ec4f..f7593a3373 100644 --- a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py @@ -13,7 +13,7 @@ class SUN397Classification(AbsTaskImageClassification): "path": "dpdl-benchmark/sun397", "revision": "7e6af6a2499ad708618be868e1471eac0aca1168", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py index e4561b2165..4c049f540d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py @@ -13,7 +13,7 @@ class StanfordCarsClassification(AbsTaskImageClassification): "path": "isaacchung/StanfordCars", "revision": "09ffe9bc7864d3f1e851529e5c4b7e05601a04fb", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py index ef82d99d9e..41c923b538 100644 --- a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py @@ -17,7 +17,7 @@ class UCF101Classification(AbsTaskImageClassification): "path": "flwrlabs/ucf101", "revision": "1098eed48f2929443f47c39f3b5c814e16369c11", }, - type="Classification", + type="ImageClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py index fa0628b351..1a02997aec 100644 --- a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py +++ b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py @@ -16,7 +16,7 @@ class VOC2007Classification(AbsTaskImageMultilabelClassification): "name": "voc2007_main", "revision": "dbafdb9e1506c9c419c5c4672e409463cd21ba50", }, - type="MultilabelClassification", + type="ImageMultilabelClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py index 8d78bb7238..c036b54042 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS12VisualSTS.py @@ -13,7 +13,7 @@ class STS12VisualSTS(AbsTaskVisualSTS): }, description="SemEval-2012 Task 6." + "then rendered into images.", reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=["test"], diff --git a/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py index 1b02248d35..cf4c0aa6c4 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS13VisualSTS.py @@ -13,7 +13,7 @@ class STS13VisualSTS(AbsTaskVisualSTS): }, description="SemEval STS 2013 dataset." + "then rendered into images.", reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=["test"], diff --git a/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py index a427fdae0b..46dce36f80 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS14VisualSTS.py @@ -14,7 +14,7 @@ class STS14VisualSTS(AbsTaskVisualSTS): description="SemEval STS 2014 dataset. Currently only the English dataset." + "rendered into images.", reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=["test"], diff --git a/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py index 12c9a74c81..a9aca02c39 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS15VisualSTS.py @@ -13,7 +13,7 @@ class STS15VisualSTS(AbsTaskVisualSTS): }, description="SemEval STS 2015 dataset" + "rendered into images.", reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=["test"], diff --git a/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py b/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py index ae1e2900dd..b64e040282 100644 --- a/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/en/STS16VisualSTS.py @@ -13,7 +13,7 @@ class STS16VisualSTS(AbsTaskVisualSTS): }, description="SemEval STS 2016 dataset" + "rendered into images.", reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=["test"], diff --git a/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py b/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py index dc9e464dcf..b72988676a 100644 --- a/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py @@ -33,7 +33,7 @@ class STS17MultilingualVisualSTS(AbsTaskVisualSTS, MultilingualTask): + "rendered into images." ), reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=_SPLITS, diff --git a/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py b/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py index 8cf063d059..339be27c37 100644 --- a/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py @@ -34,7 +34,7 @@ class STSBenchmarkMultilingualVisualSTS(AbsTaskVisualSTS, MultilingualTask): + "built upon multi-sts created by Philip May" ), reference="https://arxiv.org/abs/2402.08183/", - type="STS", + type="VisualSTS", category="i2i", modalities=["image"], eval_splits=_SPLITS, diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/Country211.py b/mteb/tasks/Image/ZeroshotClassification/eng/Country211.py index ce3e7657d8..0a60e33003 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/Country211.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/Country211.py @@ -17,7 +17,7 @@ class Country211Classification(AbsTaskZeroshotClassification): "path": "clip-benchmark/wds_country211", "revision": "1699f138f0558342a1cbf99f7cf36b4361bb5ebc", }, - type="Classification", + type="ZeroShotClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ZeroshotClassification/eng/Imagenet1k.py index 6c96fad3ab..87dc8e277e 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/Imagenet1k.py @@ -17,7 +17,7 @@ class Imagenet1kClassification(AbsTaskZeroshotClassification): "path": "clip-benchmark/wds_imagenet1k", "revision": "b24c7a5a3ef12df09089055d1795e2ce7c7e7397", }, - type="Classification", + type="ZeroShotClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], diff --git a/mteb/tasks/Image/ZeroshotClassification/eng/UCF101.py b/mteb/tasks/Image/ZeroshotClassification/eng/UCF101.py index 9274e7c1f5..b95021184c 100644 --- a/mteb/tasks/Image/ZeroshotClassification/eng/UCF101.py +++ b/mteb/tasks/Image/ZeroshotClassification/eng/UCF101.py @@ -19,7 +19,7 @@ class UCF101Classification(AbsTaskZeroshotClassification): "path": "flwrlabs/ucf101", "revision": "1098eed48f2929443f47c39f3b5c814e16369c11", }, - type="Classification", + type="ZeroShotClassification", category="i2t", eval_splits=["test"], eval_langs=["eng-Latn"], From 3385584d19f6cff318771cba0035088168d5ccff Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 21 Oct 2024 14:34:06 +0100 Subject: [PATCH 60/73] update BLINKIT2T metadata --- mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py index c006d7240d..262927056b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py @@ -40,7 +40,7 @@ class BLINKIT2TRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 3080, + "num_documents": 26, "num_queries": 1073, "average_relevant_docs_per_query": 1, } From 3b93440c734b67cf93c1145c84167be917fae3f9 Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Mon, 21 Oct 2024 14:51:01 +0100 Subject: [PATCH 61/73] fix wrong meta --- mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py | 4 +--- .../tasks/Image/ImageTextPairClassification/AROFlickrOrder.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py index bc9c50a3ba..c72ef004bd 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py @@ -34,11 +34,9 @@ class AROCocoOrder(AbsTaskImageTextPairClassification): "2022-01-01", "2022-12-31", ), # Estimated range for the collection of data - form=["written"], domains=["Encyclopaedic"], task_subtypes=["Caption Pairing"], - license="Not specified", - socioeconomic_status="mixed", + license="MIT", annotations_creators="expert-annotated", dialect=[], modalities=["text", "image"], diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py index b423861f23..bd8ec152bf 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py @@ -34,11 +34,9 @@ class AROFlickrOrder(AbsTaskImageTextPairClassification): "2022-01-01", "2022-12-31", ), # Estimated range for the collection of data - form=["written"], domains=["Encyclopaedic"], task_subtypes=["Caption Pairing"], - license="Not specified", - socioeconomic_status="mixed", + license="MIT", annotations_creators="expert-annotated", dialect=[], modalities=["text", "image"], From 3998ccd0023fc5b9b5daa025748b6443b53bd54d Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Mon, 21 Oct 2024 15:24:39 +0100 Subject: [PATCH 62/73] run mieb script --- scripts/run_mieb.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/run_mieb.py diff --git a/scripts/run_mieb.py b/scripts/run_mieb.py new file mode 100644 index 0000000000..ea5f19fe45 --- /dev/null +++ b/scripts/run_mieb.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import mteb + +for model_name in [ + "openai/clip-vit-base-patch32", + "openai/clip-vit-base-patch16", + "openai/clip-vit-large-patch14", + "royokong/e5-v", + "BAAI/bge-visualized-base", + "BAAI/bge-visualized-m3", + "google/siglip-so400m-patch14-384", + "kakaobrain/align-base", + "jinaai/jina-clip-v1", + "nomic-ai/nomic-embed-vision-v1.5", + "Salesforce/blip-image-captioning-large", + "Salesforce/blip-image-captioning-base", + "Salesforce/blip2-opt-2.7b", + "Salesforce/blip2-opt-6.7b-coco", + "facebook/dinov2-small", + "facebook/dinov2-base", + "facebook/dinov2-large", + "facebook/dinov2-giant", +]: + model = mteb.get_model(model_name) + tasks = mteb.get_tasks( + task_types=[ + "Any2AnyRetrieval", + "AbsTaskAny2AnyMultiChoice", + "Any2TextMutipleChoice", + "ImageClustering", + "ImageClassification", + "ImageMultilabelClassification", + "ImageTextPairClassification", + "VisualSTS", + "ZeroShotClassification", + ] + ) + evaluation = mteb.MTEB(tasks=tasks) + results = evaluation.run(model, output_folder="results-mieb-final") From a3649a73cb81716f68481e246fc7ca7099f1dfe4 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 21 Oct 2024 16:23:47 +0100 Subject: [PATCH 63/73] split ROxford, RParis into easy, medium and hard --- .../eng/ROxfordI2IRetrieval.py | 108 +++++++++- .../eng/RParisI2IRetrieval.py | 112 +++++++++-- .../ROxfordEasyI2IRetrieval.json | 186 ++++++++++++++++++ .../ROxfordHardI2IRetrieval.json | 186 ++++++++++++++++++ .../ROxfordI2IRetrieval.json | 186 ------------------ .../ROxfordMediumI2IRetrieval.json | 186 ++++++++++++++++++ .../RParisEasyI2IRetrieval.json | 186 ++++++++++++++++++ .../RParisHardI2IRetrieval.json | 186 ++++++++++++++++++ .../RParisI2IRetrieval.json | 186 ------------------ .../RParisMediumI2IRetrieval.json | 186 ++++++++++++++++++ 10 files changed, 1316 insertions(+), 392 deletions(-) create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index 39502a46d8..ad6837bc99 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -4,14 +4,14 @@ from mteb.abstasks.TaskMetadata import TaskMetadata -class ROxfordI2IRetrieval(AbsTaskAny2AnyRetrieval): +class ROxfordEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( - name="ROxfordI2IRetrieval", + name="ROxfordEasyI2IRetrieval", description="Retrieve photos of landmarks in Oxford, UK.", reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", dataset={ - "path": "JamieSJS/r-oxford", - "revision": "d8daad98b4e4896a7f7fa1b3485a22420242d4fc", + "path": "JamieSJS/r-oxford-easy", + "revision": "3f018eb7ad32218a5a4ebd704493e0834a265cf5", }, type="Retrieval", category="i2i", @@ -35,16 +35,106 @@ class ROxfordI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"test": 3555537}, + "n_samples": {"test": 70}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 5063, - "num_queries": 5063, - "average_relevant_docs_per_query": 702, + "num_documents": 4993, + "num_queries": 70, + "average_relevant_docs_per_query": 43.3, } }, }, ) - skip_first_result = True + skip_first_result = False + +class ROxfordMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="ROxfordMediumI2IRetrieval", + description="Retrieve photos of landmarks in Oxford, UK.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-oxford-medium", + "revision": "3bd28e9c45e15f299117c634799f7035c4de2d31", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"test": 70}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 4993, + "num_queries": 70, + "average_relevant_docs_per_query": 78.9, + } + }, + }, + ) + skip_first_result = False + +class ROxfordHardI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="ROxfordHardI2IRetrieval", + description="Retrieve photos of landmarks in Oxford, UK.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-oxford-hard", + "revision": "f20b30211b7ba3fc64a02bd83998fe75f3023719", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"test": 70}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 4993, + "num_queries": 70, + "average_relevant_docs_per_query": 35.7, + } + }, + }, + ) + skip_first_result = False \ No newline at end of file diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index a112ded273..e5687ce125 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -4,14 +4,14 @@ from mteb.abstasks.TaskMetadata import TaskMetadata -class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): +class RParisEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( - name="RParisI2IRetrieval", - description="Retrieve photos of landmarks in Paris.", - reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", + name="RParisEasyI2IRetrieval", + description="Retrieve photos of landmarks in Paris, France.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", dataset={ - "path": "JamieSJS/r-paris", - "revision": "bafc3a08fdffd72558021ce3a41250833d7e0e88", + "path": "JamieSJS/r-paris-easy", + "revision": "a7293da8a341de665ee4dcb2f209281df342d80b", }, type="Retrieval", category="i2i", @@ -35,16 +35,106 @@ class RParisI2IRetrieval(AbsTaskAny2AnyRetrieval): } """, descriptive_stats={ - "n_samples": {"test": 6392}, + "n_samples": {"test": 70}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 6392, - "num_queries": 6392, - "average_relevant_docs_per_query": 734, + "num_documents": 6322, + "num_queries": 70, + "average_relevant_docs_per_query": 98.2, } }, }, ) - skip_first_result = True + skip_first_result = False + +class RParisMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="RParisMediumI2IRetrieval", + description="Retrieve photos of landmarks in Paris, France.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-paris-medium", + "revision": "900267b49003a086979e8d52f6942624236bfc34", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"test": 70}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 6322, + "num_queries": 70, + "average_relevant_docs_per_query": 147.9, + } + }, + }, + ) + skip_first_result = False + +class RParisHardI2IRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="RParisHardI2IRetrieval", + description="Retrieve photos of landmarks in Paris, France.", + reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", + dataset={ + "path": "JamieSJS/r-paris-hard", + "revision": "fd121b6592fe946616fa85116703b94a4c61fd63", + }, + type="Retrieval", + category="i2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="cv_recall_at_1", + date=("2009-01-01", "2010-04-01"), + domains=["Web"], + task_subtypes=["Object recognition"], + license="Not specified", + annotations_creators="derived", + dialect=[], + modalities=["image"], + sample_creation="created", + bibtex_citation="""@inproceedings{radenovic2018revisiting, + title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5706--5715}, + year={2018} +} + """, + descriptive_stats={ + "n_samples": {"test": 70}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 6322, + "num_queries": 70, + "average_relevant_docs_per_query": 35.7, + } + }, + }, + ) + skip_first_result = False \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json new file mode 100644 index 0000000000..319e3d389b --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "3f018eb7ad32218a5a4ebd704493e0834a265cf5", + "evaluation_time": 17.977893829345703, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.73529, + "cv_recall_at_10": 0.88235, + "cv_recall_at_100": 0.95588, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.89706, + "cv_recall_at_3": 0.79412, + "cv_recall_at_5": 0.82353, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.73529, + "map_at_1": 0.12315, + "map_at_10": 0.25156, + "map_at_100": 0.39829, + "map_at_1000": 0.46268, + "map_at_20": 0.30547, + "map_at_3": 0.16397, + "map_at_5": 0.20244, + "mrr_at_1": 0.7352941176470589, + "mrr_at_10": 0.7796977124183007, + "mrr_at_100": 0.782038983404294, + "mrr_at_1000": 0.7822887502278202, + "mrr_at_20": 0.7806168300653595, + "mrr_at_3": 0.7647058823529411, + "mrr_at_5": 0.7713235294117647, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.43205924601836393, + "nauc_cv_recall_at_100_max": 0.46045430774676144, + "nauc_cv_recall_at_100_std": 0.092928936942703, + "nauc_cv_recall_at_10_diff1": 0.5238482274120124, + "nauc_cv_recall_at_10_max": 0.5447795999610322, + "nauc_cv_recall_at_10_std": -0.06773626862669024, + "nauc_cv_recall_at_1_diff1": 0.12227878218725667, + "nauc_cv_recall_at_1_max": 0.6674736854281768, + "nauc_cv_recall_at_1_std": 0.14078948712021733, + "nauc_cv_recall_at_20_diff1": 0.5375955313486179, + "nauc_cv_recall_at_20_max": 0.46954147068219054, + "nauc_cv_recall_at_20_std": -0.09380403989753433, + "nauc_cv_recall_at_3_diff1": 0.032732730804488214, + "nauc_cv_recall_at_3_max": 0.6199038499058639, + "nauc_cv_recall_at_3_std": 0.1529241887407745, + "nauc_cv_recall_at_5_diff1": 0.05658962411663845, + "nauc_cv_recall_at_5_max": 0.5877996333799903, + "nauc_cv_recall_at_5_std": 0.07730142560650717, + "nauc_map_at_1000_diff1": -0.11043090289526158, + "nauc_map_at_1000_max": 0.3448001394797035, + "nauc_map_at_1000_std": 0.27617723792125437, + "nauc_map_at_100_diff1": -0.01003127759400255, + "nauc_map_at_100_max": 0.1251706076455483, + "nauc_map_at_100_std": 0.03701229955981713, + "nauc_map_at_10_diff1": 0.2139884419568207, + "nauc_map_at_10_max": -0.26177042855989124, + "nauc_map_at_10_std": -0.26537536804238954, + "nauc_map_at_1_diff1": 0.21399681007835022, + "nauc_map_at_1_max": -0.3465990811601411, + "nauc_map_at_1_std": -0.29206561782006085, + "nauc_map_at_20_diff1": 0.1442752699133972, + "nauc_map_at_20_max": -0.15930665035054287, + "nauc_map_at_20_std": -0.21656927646466867, + "nauc_map_at_3_diff1": 0.23213875592486613, + "nauc_map_at_3_max": -0.3123546266529488, + "nauc_map_at_3_std": -0.28815041773010014, + "nauc_map_at_5_diff1": 0.2555448109113392, + "nauc_map_at_5_max": -0.29279690087015, + "nauc_map_at_5_std": -0.2774467752271305, + "nauc_mrr_at_1000_diff1": 0.12342303172342535, + "nauc_mrr_at_1000_max": 0.6362159972261828, + "nauc_mrr_at_1000_std": 0.12097023869090642, + "nauc_mrr_at_100_diff1": 0.12414954187540776, + "nauc_mrr_at_100_max": 0.6357802506977609, + "nauc_mrr_at_100_std": 0.12039621076657632, + "nauc_mrr_at_10_diff1": 0.12909230332110294, + "nauc_mrr_at_10_max": 0.6368549836172475, + "nauc_mrr_at_10_std": 0.11895636170173347, + "nauc_mrr_at_1_diff1": 0.12227878218725667, + "nauc_mrr_at_1_max": 0.6674736854281768, + "nauc_mrr_at_1_std": 0.14078948712021733, + "nauc_mrr_at_20_diff1": 0.1279569336937743, + "nauc_mrr_at_20_max": 0.6347997662966113, + "nauc_mrr_at_20_std": 0.11886311296272292, + "nauc_mrr_at_3_diff1": 0.0819493669888214, + "nauc_mrr_at_3_max": 0.6460493610936507, + "nauc_mrr_at_3_std": 0.14625466858168346, + "nauc_mrr_at_5_diff1": 0.08926776001340989, + "nauc_mrr_at_5_max": 0.6409743370271209, + "nauc_mrr_at_5_std": 0.13086158199344464, + "nauc_ndcg_at_1000_diff1": 0.0019895005695910535, + "nauc_ndcg_at_1000_max": 0.45479785106891296, + "nauc_ndcg_at_1000_std": 0.24337684988960884, + "nauc_ndcg_at_100_diff1": -0.09459953169491665, + "nauc_ndcg_at_100_max": 0.3603949417192614, + "nauc_ndcg_at_100_std": 0.25977974826865124, + "nauc_ndcg_at_10_diff1": -0.13743096053058393, + "nauc_ndcg_at_10_max": 0.5086455404107587, + "nauc_ndcg_at_10_std": 0.2942246989065215, + "nauc_ndcg_at_1_diff1": 0.12227878218725667, + "nauc_ndcg_at_1_max": 0.6674736854281768, + "nauc_ndcg_at_1_std": 0.14078948712021733, + "nauc_ndcg_at_20_diff1": -0.12980804316788766, + "nauc_ndcg_at_20_max": 0.4455786946909776, + "nauc_ndcg_at_20_std": 0.31146671038942925, + "nauc_ndcg_at_3_diff1": -0.12622874187697183, + "nauc_ndcg_at_3_max": 0.6321053376737653, + "nauc_ndcg_at_3_std": 0.2970450461706155, + "nauc_ndcg_at_5_diff1": -0.1167444253887786, + "nauc_ndcg_at_5_max": 0.5830509606014207, + "nauc_ndcg_at_5_std": 0.3092017793979728, + "nauc_precision_at_1000_diff1": -0.20870904079910094, + "nauc_precision_at_1000_max": 0.553838513916762, + "nauc_precision_at_1000_std": 0.5530198613029336, + "nauc_precision_at_100_diff1": -0.35505246743099134, + "nauc_precision_at_100_max": 0.7356593773083951, + "nauc_precision_at_100_std": 0.6881226249275548, + "nauc_precision_at_10_diff1": -0.32094129231704527, + "nauc_precision_at_10_max": 0.6957334058235605, + "nauc_precision_at_10_std": 0.4986922302670923, + "nauc_precision_at_1_diff1": 0.12227878218725667, + "nauc_precision_at_1_max": 0.6674736854281768, + "nauc_precision_at_1_std": 0.14078948712021733, + "nauc_precision_at_20_diff1": -0.348557232862726, + "nauc_precision_at_20_max": 0.7349093491157456, + "nauc_precision_at_20_std": 0.5403176104598045, + "nauc_precision_at_3_diff1": -0.19598105078336095, + "nauc_precision_at_3_max": 0.6900728385079788, + "nauc_precision_at_3_std": 0.38284792684568514, + "nauc_precision_at_5_diff1": -0.24325694519449578, + "nauc_precision_at_5_max": 0.6894112316594818, + "nauc_precision_at_5_std": 0.43885055756284386, + "nauc_recall_at_1000_diff1": 0.1247114496786093, + "nauc_recall_at_1000_max": 0.1972100012754324, + "nauc_recall_at_1000_std": 0.45358985850344147, + "nauc_recall_at_100_diff1": -0.007219441799189107, + "nauc_recall_at_100_max": -0.2726450323976995, + "nauc_recall_at_100_std": -0.09774220148121496, + "nauc_recall_at_10_diff1": 0.2493200864593003, + "nauc_recall_at_10_max": -0.35624261159143006, + "nauc_recall_at_10_std": -0.3178591018768707, + "nauc_recall_at_1_diff1": 0.21399681007835022, + "nauc_recall_at_1_max": -0.3465990811601411, + "nauc_recall_at_1_std": -0.29206561782006085, + "nauc_recall_at_20_diff1": 0.17625014384578389, + "nauc_recall_at_20_max": -0.3372766383490987, + "nauc_recall_at_20_std": -0.30264233629183895, + "nauc_recall_at_3_diff1": 0.23996176062420918, + "nauc_recall_at_3_max": -0.3204556669767577, + "nauc_recall_at_3_std": -0.292832765895384, + "nauc_recall_at_5_diff1": 0.28900424501627087, + "nauc_recall_at_5_max": -0.31760291427784704, + "nauc_recall_at_5_std": -0.2855625586155051, + "ndcg_at_1": 0.73529, + "ndcg_at_10": 0.60178, + "ndcg_at_100": 0.61347, + "ndcg_at_1000": 0.69898, + "ndcg_at_20": 0.58929, + "ndcg_at_3": 0.62141, + "ndcg_at_5": 0.61571, + "precision_at_1": 0.73529, + "precision_at_10": 0.45, + "precision_at_100": 0.19691, + "precision_at_1000": 0.04228, + "precision_at_20": 0.37647, + "precision_at_3": 0.54412, + "precision_at_5": 0.51176, + "recall_at_1": 0.12315, + "recall_at_10": 0.32137, + "recall_at_100": 0.68206, + "recall_at_1000": 0.93425, + "recall_at_20": 0.43253, + "recall_at_3": 0.16748, + "recall_at_5": 0.22328 + } + ] + }, + "task_name": "ROxfordEasyI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json new file mode 100644 index 0000000000..f957d7315e --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "f20b30211b7ba3fc64a02bd83998fe75f3023719", + "evaluation_time": 17.18506622314453, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.04286, + "cv_recall_at_10": 0.44286, + "cv_recall_at_100": 0.77143, + "cv_recall_at_1000": 0.92857, + "cv_recall_at_20": 0.52857, + "cv_recall_at_3": 0.18571, + "cv_recall_at_5": 0.27143, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.04286, + "map_at_1": 0.00178, + "map_at_10": 0.02316, + "map_at_100": 0.03694, + "map_at_1000": 0.05036, + "map_at_20": 0.02814, + "map_at_3": 0.01285, + "map_at_5": 0.01752, + "mrr_at_1": 0.04285714285714286, + "mrr_at_10": 0.13929705215419497, + "mrr_at_100": 0.15254576393638014, + "mrr_at_1000": 0.15348328701607364, + "mrr_at_20": 0.14564255189255187, + "mrr_at_3": 0.1, + "mrr_at_5": 0.11857142857142858, + "nauc_cv_recall_at_1000_diff1": 0.47861811391223064, + "nauc_cv_recall_at_1000_max": 0.5808590102707742, + "nauc_cv_recall_at_1000_std": 0.09140989729224985, + "nauc_cv_recall_at_100_diff1": 0.5586401902191371, + "nauc_cv_recall_at_100_max": -0.12197159565580537, + "nauc_cv_recall_at_100_std": -0.46324143692564745, + "nauc_cv_recall_at_10_diff1": 0.33702203892735166, + "nauc_cv_recall_at_10_max": -0.2566107451434116, + "nauc_cv_recall_at_10_std": -0.2562935040214977, + "nauc_cv_recall_at_1_diff1": 0.5001341561577676, + "nauc_cv_recall_at_1_max": -0.12592791342455964, + "nauc_cv_recall_at_1_std": 0.13701815580001792, + "nauc_cv_recall_at_20_diff1": 0.33340522734848327, + "nauc_cv_recall_at_20_max": -0.27021038803160713, + "nauc_cv_recall_at_20_std": -0.2604850885276762, + "nauc_cv_recall_at_3_diff1": 0.42356508767210377, + "nauc_cv_recall_at_3_max": -0.30471252842923874, + "nauc_cv_recall_at_3_std": -0.2722848548580372, + "nauc_cv_recall_at_5_diff1": 0.37677403680683497, + "nauc_cv_recall_at_5_max": -0.30530641993804797, + "nauc_cv_recall_at_5_std": -0.2763347032980382, + "nauc_map_at_1000_diff1": 0.32492788395298233, + "nauc_map_at_1000_max": -0.264513294885872, + "nauc_map_at_1000_std": -0.06025413474178467, + "nauc_map_at_100_diff1": 0.33932455574204345, + "nauc_map_at_100_max": -0.3183944729391321, + "nauc_map_at_100_std": -0.11067793875024108, + "nauc_map_at_10_diff1": 0.3627322367418886, + "nauc_map_at_10_max": -0.270787009945954, + "nauc_map_at_10_std": -0.12449041958590679, + "nauc_map_at_1_diff1": 0.8349360037495761, + "nauc_map_at_1_max": 0.04030601005296298, + "nauc_map_at_1_std": 0.28342764489523986, + "nauc_map_at_20_diff1": 0.36471735689250456, + "nauc_map_at_20_max": -0.27443485632176895, + "nauc_map_at_20_std": -0.10351956211548237, + "nauc_map_at_3_diff1": 0.33740985231621823, + "nauc_map_at_3_max": -0.26557891525076527, + "nauc_map_at_3_std": -0.13603337759846046, + "nauc_map_at_5_diff1": 0.3822623605794258, + "nauc_map_at_5_max": -0.26535510732907475, + "nauc_map_at_5_std": -0.15188815608303466, + "nauc_mrr_at_1000_diff1": 0.42803033998914947, + "nauc_mrr_at_1000_max": -0.25330248029839103, + "nauc_mrr_at_1000_std": -0.1498777504287341, + "nauc_mrr_at_100_diff1": 0.428756525260202, + "nauc_mrr_at_100_max": -0.255106401697983, + "nauc_mrr_at_100_std": -0.15247667584774333, + "nauc_mrr_at_10_diff1": 0.4269277134986295, + "nauc_mrr_at_10_max": -0.26024547551332144, + "nauc_mrr_at_10_std": -0.15027887241014917, + "nauc_mrr_at_1_diff1": 0.5001341561577676, + "nauc_mrr_at_1_max": -0.12592791342455964, + "nauc_mrr_at_1_std": 0.13701815580001792, + "nauc_mrr_at_20_diff1": 0.4273548864254061, + "nauc_mrr_at_20_max": -0.2649857851418372, + "nauc_mrr_at_20_std": -0.150811901899745, + "nauc_mrr_at_3_diff1": 0.46234460520174797, + "nauc_mrr_at_3_max": -0.26846705418134004, + "nauc_mrr_at_3_std": -0.14951136379707816, + "nauc_mrr_at_5_diff1": 0.4315603993226756, + "nauc_mrr_at_5_max": -0.2751708043421038, + "nauc_mrr_at_5_std": -0.15676414005890574, + "nauc_ndcg_at_1000_diff1": 0.3031539661086623, + "nauc_ndcg_at_1000_max": -0.06324949257415059, + "nauc_ndcg_at_1000_std": 0.03291344536352667, + "nauc_ndcg_at_100_diff1": 0.3537878100611368, + "nauc_ndcg_at_100_max": -0.3440256642252378, + "nauc_ndcg_at_100_std": -0.13928638317122793, + "nauc_ndcg_at_10_diff1": 0.31423572757944224, + "nauc_ndcg_at_10_max": -0.27799943432453955, + "nauc_ndcg_at_10_std": -0.12000726018713354, + "nauc_ndcg_at_1_diff1": 0.5001341561577676, + "nauc_ndcg_at_1_max": -0.12592791342455964, + "nauc_ndcg_at_1_std": 0.13701815580001792, + "nauc_ndcg_at_20_diff1": 0.3527561180413498, + "nauc_ndcg_at_20_max": -0.29516361937414604, + "nauc_ndcg_at_20_std": -0.10951825882002067, + "nauc_ndcg_at_3_diff1": 0.3265974284434805, + "nauc_ndcg_at_3_max": -0.27320998825572146, + "nauc_ndcg_at_3_std": -0.12237689567430368, + "nauc_ndcg_at_5_diff1": 0.32240119227390085, + "nauc_ndcg_at_5_max": -0.30077766985994786, + "nauc_ndcg_at_5_std": -0.15147497522509035, + "nauc_precision_at_1000_diff1": -0.12639282009702774, + "nauc_precision_at_1000_max": 0.22826737103796205, + "nauc_precision_at_1000_std": 0.32628514757600596, + "nauc_precision_at_100_diff1": 0.2772154726137188, + "nauc_precision_at_100_max": -0.23028476826992986, + "nauc_precision_at_100_std": -0.1303180540104937, + "nauc_precision_at_10_diff1": 0.29107138713345343, + "nauc_precision_at_10_max": -0.30600784337992426, + "nauc_precision_at_10_std": -0.12021874830794935, + "nauc_precision_at_1_diff1": 0.5001341561577676, + "nauc_precision_at_1_max": -0.12592791342455964, + "nauc_precision_at_1_std": 0.13701815580001792, + "nauc_precision_at_20_diff1": 0.27727122281419164, + "nauc_precision_at_20_max": -0.3127822679663893, + "nauc_precision_at_20_std": -0.07951548670204237, + "nauc_precision_at_3_diff1": 0.3064711218627153, + "nauc_precision_at_3_max": -0.29553472432214506, + "nauc_precision_at_3_std": -0.17296643483519816, + "nauc_precision_at_5_diff1": 0.27289717372431765, + "nauc_precision_at_5_max": -0.33688627375325547, + "nauc_precision_at_5_std": -0.1943667406192727, + "nauc_recall_at_1000_diff1": 0.2612937021120264, + "nauc_recall_at_1000_max": -0.04177364682142833, + "nauc_recall_at_1000_std": -0.10225779216767208, + "nauc_recall_at_100_diff1": 0.23045925470740378, + "nauc_recall_at_100_max": -0.32266175361589855, + "nauc_recall_at_100_std": -0.10122639227831537, + "nauc_recall_at_10_diff1": 0.3214491777002914, + "nauc_recall_at_10_max": -0.2191819309348708, + "nauc_recall_at_10_std": -0.13294514318629588, + "nauc_recall_at_1_diff1": 0.8349360037495761, + "nauc_recall_at_1_max": 0.04030601005296298, + "nauc_recall_at_1_std": 0.28342764489523986, + "nauc_recall_at_20_diff1": 0.30304432643409973, + "nauc_recall_at_20_max": -0.2369864752715589, + "nauc_recall_at_20_std": -0.11319354561879752, + "nauc_recall_at_3_diff1": 0.2610862099372524, + "nauc_recall_at_3_max": -0.26808957441898906, + "nauc_recall_at_3_std": -0.1754514836876096, + "nauc_recall_at_5_diff1": 0.3659777743786138, + "nauc_recall_at_5_max": -0.23557478979416202, + "nauc_recall_at_5_std": -0.1749293090721626, + "ndcg_at_1": 0.04286, + "ndcg_at_10": 0.08519, + "ndcg_at_100": 0.12707, + "ndcg_at_1000": 0.26609, + "ndcg_at_20": 0.08785, + "ndcg_at_3": 0.07857, + "ndcg_at_5": 0.08201, + "precision_at_1": 0.04286, + "precision_at_10": 0.07571, + "precision_at_100": 0.03571, + "precision_at_1000": 0.01857, + "precision_at_20": 0.05714, + "precision_at_3": 0.08571, + "precision_at_5": 0.08, + "recall_at_1": 0.00178, + "recall_at_10": 0.07379, + "recall_at_100": 0.22471, + "recall_at_1000": 0.60888, + "recall_at_20": 0.09447, + "recall_at_3": 0.02931, + "recall_at_5": 0.04839 + } + ] + }, + "task_name": "ROxfordHardI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json deleted file mode 100644 index e47a943219..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordI2IRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "d8daad98b4e4896a7f7fa1b3485a22420242d4fc", - "evaluation_time": 29.71091365814209, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 0.80946, - "cv_recall_at_10": 0.99953, - "cv_recall_at_100": 1.0, - "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 1.0, - "cv_recall_at_3": 0.97636, - "cv_recall_at_5": 0.99291, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 0.80946, - "map_at_1": 0.00083, - "map_at_10": 0.00583, - "map_at_100": 0.04516, - "map_at_1000": 0.27358, - "map_at_20": 0.01067, - "map_at_3": 0.00211, - "map_at_5": 0.00324, - "mrr_at_1": 0.9985815602836879, - "mrr_at_10": 0.9992119779353821, - "mrr_at_100": 0.9992119779353821, - "mrr_at_1000": 0.9992119779353821, - "mrr_at_20": 0.9992119779353821, - "mrr_at_3": 0.9992119779353821, - "mrr_at_5": 0.9992119779353821, - "nauc_cv_recall_at_1000_diff1": NaN, - "nauc_cv_recall_at_1000_max": NaN, - "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": NaN, - "nauc_cv_recall_at_100_max": NaN, - "nauc_cv_recall_at_100_std": NaN, - "nauc_cv_recall_at_10_diff1": 0.5540638703677596, - "nauc_cv_recall_at_10_max": 0.7221878767269868, - "nauc_cv_recall_at_10_std": 0.869246700256431, - "nauc_cv_recall_at_1_diff1": -0.24158864035258856, - "nauc_cv_recall_at_1_max": -0.0420448021592233, - "nauc_cv_recall_at_1_std": 0.04484895202286055, - "nauc_cv_recall_at_20_diff1": NaN, - "nauc_cv_recall_at_20_max": NaN, - "nauc_cv_recall_at_20_std": NaN, - "nauc_cv_recall_at_3_diff1": -0.2813966714480976, - "nauc_cv_recall_at_3_max": -0.04571848249527183, - "nauc_cv_recall_at_3_std": 0.06606763479339506, - "nauc_cv_recall_at_5_diff1": -0.057737057928295976, - "nauc_cv_recall_at_5_max": 0.017855647720182007, - "nauc_cv_recall_at_5_std": 0.08427153819844212, - "nauc_map_at_1000_diff1": -0.3857995513654731, - "nauc_map_at_1000_max": 0.03808456962512866, - "nauc_map_at_1000_std": -0.06663520492889344, - "nauc_map_at_100_diff1": -0.34437629024420324, - "nauc_map_at_100_max": 0.020553049295469906, - "nauc_map_at_100_std": 0.0556790533768489, - "nauc_map_at_10_diff1": -0.18993364966776907, - "nauc_map_at_10_max": 0.006866799821313959, - "nauc_map_at_10_std": 0.13195237855420855, - "nauc_map_at_1_diff1": -0.05431107143472811, - "nauc_map_at_1_max": 0.016884860597154504, - "nauc_map_at_1_std": 0.11466554606849116, - "nauc_map_at_20_diff1": -0.2414863261116838, - "nauc_map_at_20_max": 0.008935096908909975, - "nauc_map_at_20_std": 0.11557906533166638, - "nauc_map_at_3_diff1": -0.12217591294530498, - "nauc_map_at_3_max": 0.008152934040333987, - "nauc_map_at_3_std": 0.13797559913063503, - "nauc_map_at_5_diff1": -0.13888462292383946, - "nauc_map_at_5_max": 0.007634414930511178, - "nauc_map_at_5_std": 0.13391202111166425, - "nauc_mrr_at_1000_diff1": 0.9476986801026335, - "nauc_mrr_at_1000_max": 1.0, - "nauc_mrr_at_1000_std": 0.9166563630181881, - "nauc_mrr_at_100_diff1": 0.9476986801026335, - "nauc_mrr_at_100_max": 1.0, - "nauc_mrr_at_100_std": 0.9166563630181881, - "nauc_mrr_at_10_diff1": 0.9476986801026335, - "nauc_mrr_at_10_max": 1.0, - "nauc_mrr_at_10_std": 0.9166563630181881, - "nauc_mrr_at_1_diff1": 0.9564155667521437, - "nauc_mrr_at_1_max": 1.0, - "nauc_mrr_at_1_std": 0.9073959589089956, - "nauc_mrr_at_20_diff1": 0.9476986801026335, - "nauc_mrr_at_20_max": 1.0, - "nauc_mrr_at_20_std": 0.9166563630181881, - "nauc_mrr_at_3_diff1": 0.9476986801026335, - "nauc_mrr_at_3_max": 1.0, - "nauc_mrr_at_3_std": 0.9166563630181881, - "nauc_mrr_at_5_diff1": 0.9476986801026335, - "nauc_mrr_at_5_max": 1.0, - "nauc_mrr_at_5_std": 0.9166563630181881, - "nauc_ndcg_at_1000_diff1": -0.29077931122633116, - "nauc_ndcg_at_1000_max": 0.014960510380277934, - "nauc_ndcg_at_1000_std": -0.12483981180901933, - "nauc_ndcg_at_100_diff1": -0.3609816949819758, - "nauc_ndcg_at_100_max": 0.021419925854651446, - "nauc_ndcg_at_100_std": -0.17265752707444715, - "nauc_ndcg_at_10_diff1": -0.27457750496652206, - "nauc_ndcg_at_10_max": 0.0008466224260104365, - "nauc_ndcg_at_10_std": -0.045841557226464144, - "nauc_ndcg_at_1_diff1": 0.9564155667521437, - "nauc_ndcg_at_1_max": 1.0, - "nauc_ndcg_at_1_std": 0.9073959589089956, - "nauc_ndcg_at_20_diff1": -0.3094417772089175, - "nauc_ndcg_at_20_max": 0.009714640004268442, - "nauc_ndcg_at_20_std": -0.08586776230182966, - "nauc_ndcg_at_3_diff1": -0.23757585938494089, - "nauc_ndcg_at_3_max": -0.016387555353346198, - "nauc_ndcg_at_3_std": 0.030386940725562046, - "nauc_ndcg_at_5_diff1": -0.23196460003691158, - "nauc_ndcg_at_5_max": -0.013062943721282708, - "nauc_ndcg_at_5_std": -0.010500172553175749, - "nauc_precision_at_1000_diff1": -0.17619733662463302, - "nauc_precision_at_1000_max": 0.012339801300641195, - "nauc_precision_at_1000_std": -0.1876530479926194, - "nauc_precision_at_100_diff1": -0.3626475300981416, - "nauc_precision_at_100_max": 0.025040872201569076, - "nauc_precision_at_100_std": -0.18897442356689456, - "nauc_precision_at_10_diff1": -0.2782683949086005, - "nauc_precision_at_10_max": 0.006150543325428016, - "nauc_precision_at_10_std": -0.05974378846400665, - "nauc_precision_at_1_diff1": 0.9564155667521437, - "nauc_precision_at_1_max": 1.0, - "nauc_precision_at_1_std": 0.9073959589089956, - "nauc_precision_at_20_diff1": -0.31218967700544853, - "nauc_precision_at_20_max": 0.015101101880288523, - "nauc_precision_at_20_std": -0.10187042490803115, - "nauc_precision_at_3_diff1": -0.2418495400445544, - "nauc_precision_at_3_max": -0.017073069963463458, - "nauc_precision_at_3_std": 0.024904433150907852, - "nauc_precision_at_5_diff1": -0.23061542055201162, - "nauc_precision_at_5_max": -0.013293477378375076, - "nauc_precision_at_5_std": -0.02217796284197703, - "nauc_recall_at_1000_diff1": -0.1758800893625114, - "nauc_recall_at_1000_max": 0.025501344233019, - "nauc_recall_at_1000_std": 0.042539963927428814, - "nauc_recall_at_100_diff1": -0.2486522818740174, - "nauc_recall_at_100_max": 0.019163648439019607, - "nauc_recall_at_100_std": 0.08018065043566762, - "nauc_recall_at_10_diff1": -0.1643890021643603, - "nauc_recall_at_10_max": 0.015612163534741345, - "nauc_recall_at_10_std": 0.12932048693738263, - "nauc_recall_at_1_diff1": -0.05431107143472811, - "nauc_recall_at_1_max": 0.016884860597154504, - "nauc_recall_at_1_std": 0.11466554606849116, - "nauc_recall_at_20_diff1": -0.19900512249205043, - "nauc_recall_at_20_max": 0.013811903907386002, - "nauc_recall_at_20_std": 0.11661397478238389, - "nauc_recall_at_3_diff1": -0.11946531860608983, - "nauc_recall_at_3_max": 0.009964580844462759, - "nauc_recall_at_3_std": 0.13655833592145109, - "nauc_recall_at_5_diff1": -0.1255934399523269, - "nauc_recall_at_5_max": 0.012044411079830297, - "nauc_recall_at_5_std": 0.1305906448336473, - "ndcg_at_1": 0.99858, - "ndcg_at_10": 0.82209, - "ndcg_at_100": 0.74273, - "ndcg_at_1000": 0.56583, - "ndcg_at_20": 0.79415, - "ndcg_at_3": 0.89211, - "ndcg_at_5": 0.8591, - "precision_at_1": 0.99858, - "precision_at_10": 0.79054, - "precision_at_100": 0.72617, - "precision_at_1000": 0.49807, - "precision_at_20": 0.76664, - "precision_at_3": 0.86383, - "precision_at_5": 0.82752, - "recall_at_1": 0.00083, - "recall_at_10": 0.00647, - "recall_at_100": 0.05838, - "recall_at_1000": 0.3868, - "recall_at_20": 0.01248, - "recall_at_3": 0.00214, - "recall_at_5": 0.00341 - } - ] - }, - "task_name": "ROxfordI2IRetrieval" -} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json new file mode 100644 index 0000000000..27241284b1 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "3bd28e9c45e15f299117c634799f7035c4de2d31", + "evaluation_time": 17.523592710494995, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.75714, + "cv_recall_at_10": 0.87143, + "cv_recall_at_100": 0.94286, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.88571, + "cv_recall_at_3": 0.82857, + "cv_recall_at_5": 0.85714, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.75714, + "map_at_1": 0.04928, + "map_at_10": 0.14238, + "map_at_100": 0.25975, + "map_at_1000": 0.33, + "map_at_20": 0.1832, + "map_at_3": 0.08106, + "map_at_5": 0.10574, + "mrr_at_1": 0.7571428571428571, + "mrr_at_10": 0.7992857142857144, + "mrr_at_100": 0.8018753503269448, + "mrr_at_1000": 0.8022392070599913, + "mrr_at_20": 0.8001785714285715, + "mrr_at_3": 0.7904761904761906, + "mrr_at_5": 0.796904761904762, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.15581232492997169, + "nauc_cv_recall_at_100_max": 0.2811624649859951, + "nauc_cv_recall_at_100_std": -0.12196545284780497, + "nauc_cv_recall_at_10_diff1": 0.394540014958862, + "nauc_cv_recall_at_10_max": 0.4610535313601873, + "nauc_cv_recall_at_10_std": -0.14937493321936154, + "nauc_cv_recall_at_1_diff1": 0.3099268419602683, + "nauc_cv_recall_at_1_max": 0.6277204689460957, + "nauc_cv_recall_at_1_std": 0.11781811503259924, + "nauc_cv_recall_at_20_diff1": 0.39261983863312777, + "nauc_cv_recall_at_20_max": 0.3849074513526345, + "nauc_cv_recall_at_20_std": -0.18147840531561454, + "nauc_cv_recall_at_3_diff1": 0.29657669519420665, + "nauc_cv_recall_at_3_max": 0.5705233706385782, + "nauc_cv_recall_at_3_std": 0.04756418696510771, + "nauc_cv_recall_at_5_diff1": 0.37162293488824066, + "nauc_cv_recall_at_5_max": 0.5234207968901848, + "nauc_cv_recall_at_5_std": -0.06068999028182738, + "nauc_map_at_1000_diff1": -0.14810539444535628, + "nauc_map_at_1000_max": 0.42639970063449095, + "nauc_map_at_1000_std": 0.35965851851847036, + "nauc_map_at_100_diff1": -0.04654719636323819, + "nauc_map_at_100_max": 0.20124568398625026, + "nauc_map_at_100_std": 0.09779920001336984, + "nauc_map_at_10_diff1": 0.19553982602379108, + "nauc_map_at_10_max": -0.16962211647877953, + "nauc_map_at_10_std": -0.2049718676191143, + "nauc_map_at_1_diff1": 0.19831336261662813, + "nauc_map_at_1_max": -0.26048526225990315, + "nauc_map_at_1_std": -0.24848660044643464, + "nauc_map_at_20_diff1": 0.11533311201293248, + "nauc_map_at_20_max": -0.06940468788280867, + "nauc_map_at_20_std": -0.15394999023265438, + "nauc_map_at_3_diff1": 0.22661360439380449, + "nauc_map_at_3_max": -0.24582246289343465, + "nauc_map_at_3_std": -0.24358060859761363, + "nauc_map_at_5_diff1": 0.2391073300885911, + "nauc_map_at_5_max": -0.21313902222536138, + "nauc_map_at_5_std": -0.22555952910665222, + "nauc_mrr_at_1000_diff1": 0.3118072480358942, + "nauc_mrr_at_1000_max": 0.5896434235555521, + "nauc_mrr_at_1000_std": 0.07565750084460038, + "nauc_mrr_at_100_diff1": 0.3114581767470047, + "nauc_mrr_at_100_max": 0.5888175740478946, + "nauc_mrr_at_100_std": 0.07590573329917173, + "nauc_mrr_at_10_diff1": 0.31501956699434985, + "nauc_mrr_at_10_max": 0.5906694492065899, + "nauc_mrr_at_10_std": 0.07282707502971797, + "nauc_mrr_at_1_diff1": 0.3099268419602683, + "nauc_mrr_at_1_max": 0.6277204689460957, + "nauc_mrr_at_1_std": 0.11781811503259924, + "nauc_mrr_at_20_diff1": 0.31460997067981183, + "nauc_mrr_at_20_max": 0.5883036453949262, + "nauc_mrr_at_20_std": 0.07253787758405468, + "nauc_mrr_at_3_diff1": 0.298829153668517, + "nauc_mrr_at_3_max": 0.6036890630335515, + "nauc_mrr_at_3_std": 0.09929911274966623, + "nauc_mrr_at_5_diff1": 0.31309074738187426, + "nauc_mrr_at_5_max": 0.5968819745150035, + "nauc_mrr_at_5_std": 0.08124058142418439, + "nauc_ndcg_at_1000_diff1": 0.023759962305708315, + "nauc_ndcg_at_1000_max": 0.42118350886771483, + "nauc_ndcg_at_1000_std": 0.21503403862781068, + "nauc_ndcg_at_100_diff1": -0.10602196843045252, + "nauc_ndcg_at_100_max": 0.4040222401408388, + "nauc_ndcg_at_100_std": 0.3422251986340049, + "nauc_ndcg_at_10_diff1": -0.14230825595892613, + "nauc_ndcg_at_10_max": 0.5717619317460108, + "nauc_ndcg_at_10_std": 0.35188240284777716, + "nauc_ndcg_at_1_diff1": 0.3099268419602683, + "nauc_ndcg_at_1_max": 0.6277204689460957, + "nauc_ndcg_at_1_std": 0.11781811503259924, + "nauc_ndcg_at_20_diff1": -0.15743789243142528, + "nauc_ndcg_at_20_max": 0.5724054251457065, + "nauc_ndcg_at_20_std": 0.3797961049903619, + "nauc_ndcg_at_3_diff1": 0.02676887075350004, + "nauc_ndcg_at_3_max": 0.5535721465155271, + "nauc_ndcg_at_3_std": 0.22786549855388236, + "nauc_ndcg_at_5_diff1": -0.04449400646171614, + "nauc_ndcg_at_5_max": 0.555861797739005, + "nauc_ndcg_at_5_std": 0.27833851228984907, + "nauc_precision_at_1000_diff1": -0.19203752528557633, + "nauc_precision_at_1000_max": 0.4830680560318716, + "nauc_precision_at_1000_std": 0.5059538376358517, + "nauc_precision_at_100_diff1": -0.29506043721080727, + "nauc_precision_at_100_max": 0.658086670179558, + "nauc_precision_at_100_std": 0.6166722456503947, + "nauc_precision_at_10_diff1": -0.2282600490658613, + "nauc_precision_at_10_max": 0.5965276467257976, + "nauc_precision_at_10_std": 0.42049443291187055, + "nauc_precision_at_1_diff1": 0.3099268419602683, + "nauc_precision_at_1_max": 0.6277204689460957, + "nauc_precision_at_1_std": 0.11781811503259924, + "nauc_precision_at_20_diff1": -0.2777369235938004, + "nauc_precision_at_20_max": 0.6422763272584597, + "nauc_precision_at_20_std": 0.46973331485105685, + "nauc_precision_at_3_diff1": -0.03905783426100687, + "nauc_precision_at_3_max": 0.548195217505554, + "nauc_precision_at_3_std": 0.2602422302381689, + "nauc_precision_at_5_diff1": -0.12812907648800725, + "nauc_precision_at_5_max": 0.5661902630852285, + "nauc_precision_at_5_std": 0.33254144330289526, + "nauc_recall_at_1000_diff1": 0.11552874226597364, + "nauc_recall_at_1000_max": 0.1991754113531379, + "nauc_recall_at_1000_std": 0.05667927260800616, + "nauc_recall_at_100_diff1": -0.01602892704072587, + "nauc_recall_at_100_max": -0.12257633928876718, + "nauc_recall_at_100_std": -0.01682143204953458, + "nauc_recall_at_10_diff1": 0.22298978148074566, + "nauc_recall_at_10_max": -0.22859971193805304, + "nauc_recall_at_10_std": -0.23715467924390157, + "nauc_recall_at_1_diff1": 0.19831336261662813, + "nauc_recall_at_1_max": -0.26048526225990315, + "nauc_recall_at_1_std": -0.24848660044643464, + "nauc_recall_at_20_diff1": 0.16037912600194174, + "nauc_recall_at_20_max": -0.1835918931237162, + "nauc_recall_at_20_std": -0.20833669696201074, + "nauc_recall_at_3_diff1": 0.2179234117693627, + "nauc_recall_at_3_max": -0.2525912771978034, + "nauc_recall_at_3_std": -0.24319572308223014, + "nauc_recall_at_5_diff1": 0.2530123469958082, + "nauc_recall_at_5_max": -0.22138719273185706, + "nauc_recall_at_5_std": -0.2215811361906131, + "ndcg_at_1": 0.75714, + "ndcg_at_10": 0.58385, + "ndcg_at_100": 0.48629, + "ndcg_at_1000": 0.59891, + "ndcg_at_20": 0.52663, + "ndcg_at_3": 0.6553, + "ndcg_at_5": 0.63094, + "precision_at_1": 0.75714, + "precision_at_10": 0.51286, + "precision_at_100": 0.227, + "precision_at_1000": 0.05964, + "precision_at_20": 0.42286, + "precision_at_3": 0.61429, + "precision_at_5": 0.57714, + "recall_at_1": 0.04928, + "recall_at_10": 0.17721, + "recall_at_100": 0.44393, + "recall_at_1000": 0.76857, + "recall_at_20": 0.24576, + "recall_at_3": 0.08507, + "recall_at_5": 0.11871 + } + ] + }, + "task_name": "ROxfordMediumI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json new file mode 100644 index 0000000000..bd4a6c53b9 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "a7293da8a341de665ee4dcb2f209281df342d80b", + "evaluation_time": 22.57354736328125, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.94286, + "cv_recall_at_10": 1.0, + "cv_recall_at_100": 1.0, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 1.0, + "cv_recall_at_3": 1.0, + "cv_recall_at_5": 1.0, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.94286, + "map_at_1": 0.02765, + "map_at_10": 0.11901, + "map_at_100": 0.46776, + "map_at_1000": 0.58942, + "map_at_20": 0.19679, + "map_at_3": 0.05048, + "map_at_5": 0.07407, + "mrr_at_1": 0.9428571428571428, + "mrr_at_10": 0.9666666666666666, + "mrr_at_100": 0.9666666666666666, + "mrr_at_1000": 0.9666666666666666, + "mrr_at_20": 0.9666666666666666, + "mrr_at_3": 0.9666666666666666, + "mrr_at_5": 0.9666666666666666, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": NaN, + "nauc_cv_recall_at_10_max": NaN, + "nauc_cv_recall_at_10_std": NaN, + "nauc_cv_recall_at_1_diff1": -0.303338001867413, + "nauc_cv_recall_at_1_max": -0.11951447245564903, + "nauc_cv_recall_at_1_std": 0.38328664799253065, + "nauc_cv_recall_at_20_diff1": NaN, + "nauc_cv_recall_at_20_max": NaN, + "nauc_cv_recall_at_20_std": NaN, + "nauc_cv_recall_at_3_diff1": NaN, + "nauc_cv_recall_at_3_max": NaN, + "nauc_cv_recall_at_3_std": NaN, + "nauc_cv_recall_at_5_diff1": NaN, + "nauc_cv_recall_at_5_max": NaN, + "nauc_cv_recall_at_5_std": NaN, + "nauc_map_at_1000_diff1": -0.2774518254418231, + "nauc_map_at_1000_max": 0.25032079891265085, + "nauc_map_at_1000_std": 0.45081359384807485, + "nauc_map_at_100_diff1": -0.04907209740285267, + "nauc_map_at_100_max": 0.18686662832088577, + "nauc_map_at_100_std": 0.31839565681325593, + "nauc_map_at_10_diff1": 0.4545859442374117, + "nauc_map_at_10_max": -0.04495045809094941, + "nauc_map_at_10_std": 0.002947268581625076, + "nauc_map_at_1_diff1": 0.7049183369958152, + "nauc_map_at_1_max": -0.2616079702049637, + "nauc_map_at_1_std": -0.3618487407893112, + "nauc_map_at_20_diff1": 0.30890872824690435, + "nauc_map_at_20_max": 0.07581745603833642, + "nauc_map_at_20_std": 0.15307053171724933, + "nauc_map_at_3_diff1": 0.5935344781456238, + "nauc_map_at_3_max": -0.20146382202873225, + "nauc_map_at_3_std": -0.22821424999017603, + "nauc_map_at_5_diff1": 0.5703897004682528, + "nauc_map_at_5_max": -0.16308005302080678, + "nauc_map_at_5_std": -0.1620575175690372, + "nauc_mrr_at_1000_diff1": -0.39652527677737137, + "nauc_mrr_at_1000_max": -0.10590903027877008, + "nauc_mrr_at_1000_std": 0.452714419100977, + "nauc_mrr_at_100_diff1": -0.39652527677737137, + "nauc_mrr_at_100_max": -0.10590903027877008, + "nauc_mrr_at_100_std": 0.452714419100977, + "nauc_mrr_at_10_diff1": -0.39652527677737137, + "nauc_mrr_at_10_max": -0.10590903027877008, + "nauc_mrr_at_10_std": 0.452714419100977, + "nauc_mrr_at_1_diff1": -0.303338001867413, + "nauc_mrr_at_1_max": -0.11951447245564903, + "nauc_mrr_at_1_std": 0.38328664799253065, + "nauc_mrr_at_20_diff1": -0.39652527677737137, + "nauc_mrr_at_20_max": -0.10590903027877008, + "nauc_mrr_at_20_std": 0.452714419100977, + "nauc_mrr_at_3_diff1": -0.39652527677737137, + "nauc_mrr_at_3_max": -0.10590903027877008, + "nauc_mrr_at_3_std": 0.452714419100977, + "nauc_mrr_at_5_diff1": -0.39652527677737137, + "nauc_mrr_at_5_max": -0.10590903027877008, + "nauc_mrr_at_5_std": 0.452714419100977, + "nauc_ndcg_at_1000_diff1": -0.49032575238867226, + "nauc_ndcg_at_1000_max": 0.30827149571860624, + "nauc_ndcg_at_1000_std": 0.5422742306681069, + "nauc_ndcg_at_100_diff1": -0.2712801258824707, + "nauc_ndcg_at_100_max": 0.2469510949945573, + "nauc_ndcg_at_100_std": 0.4367311454283413, + "nauc_ndcg_at_10_diff1": -0.519586478031318, + "nauc_ndcg_at_10_max": 0.35401668386388874, + "nauc_ndcg_at_10_std": 0.5596598049318611, + "nauc_ndcg_at_1_diff1": -0.303338001867413, + "nauc_ndcg_at_1_max": -0.11951447245564903, + "nauc_ndcg_at_1_std": 0.38328664799253065, + "nauc_ndcg_at_20_diff1": -0.4606517361381654, + "nauc_ndcg_at_20_max": 0.38378869676003413, + "nauc_ndcg_at_20_std": 0.474186229263476, + "nauc_ndcg_at_3_diff1": -0.585149663762276, + "nauc_ndcg_at_3_max": 0.217548589813707, + "nauc_ndcg_at_3_std": 0.550167698282911, + "nauc_ndcg_at_5_diff1": -0.5267224367233376, + "nauc_ndcg_at_5_max": 0.2591401820511711, + "nauc_ndcg_at_5_std": 0.5702945720424282, + "nauc_precision_at_1000_diff1": -0.4499561707406712, + "nauc_precision_at_1000_max": 0.15634621233710536, + "nauc_precision_at_1000_std": 0.17441880422894795, + "nauc_precision_at_100_diff1": -0.4968893518170479, + "nauc_precision_at_100_max": 0.19447257428460527, + "nauc_precision_at_100_std": 0.25819757590889797, + "nauc_precision_at_10_diff1": -0.6167580528722199, + "nauc_precision_at_10_max": 0.3956822166454678, + "nauc_precision_at_10_std": 0.587829591941972, + "nauc_precision_at_1_diff1": -0.303338001867413, + "nauc_precision_at_1_max": -0.11951447245564903, + "nauc_precision_at_1_std": 0.38328664799253065, + "nauc_precision_at_20_diff1": -0.6031642916224456, + "nauc_precision_at_20_max": 0.4138166765495704, + "nauc_precision_at_20_std": 0.5161953717797159, + "nauc_precision_at_3_diff1": -0.6298090956856782, + "nauc_precision_at_3_max": 0.24751396267538517, + "nauc_precision_at_3_std": 0.557806449101913, + "nauc_precision_at_5_diff1": -0.5860255772185323, + "nauc_precision_at_5_max": 0.29785990954926145, + "nauc_precision_at_5_std": 0.5944106636660691, + "nauc_recall_at_1000_diff1": -0.12209326635132679, + "nauc_recall_at_1000_max": 0.008543724924431836, + "nauc_recall_at_1000_std": 0.25794589209932395, + "nauc_recall_at_100_diff1": 0.16704714994186, + "nauc_recall_at_100_max": 0.053397233537995845, + "nauc_recall_at_100_std": 0.10488570154536475, + "nauc_recall_at_10_diff1": 0.5240194164262459, + "nauc_recall_at_10_max": -0.03784601294172749, + "nauc_recall_at_10_std": -0.07182820315172336, + "nauc_recall_at_1_diff1": 0.7049183369958152, + "nauc_recall_at_1_max": -0.2616079702049637, + "nauc_recall_at_1_std": -0.3618487407893112, + "nauc_recall_at_20_diff1": 0.40266587722079766, + "nauc_recall_at_20_max": 0.03757413630827814, + "nauc_recall_at_20_std": 0.042791117828726286, + "nauc_recall_at_3_diff1": 0.6179795805128587, + "nauc_recall_at_3_max": -0.1966728040865525, + "nauc_recall_at_3_std": -0.2470220135910208, + "nauc_recall_at_5_diff1": 0.6188095385694616, + "nauc_recall_at_5_max": -0.1447272403059871, + "nauc_recall_at_5_std": -0.20646683990606104, + "ndcg_at_1": 0.94286, + "ndcg_at_10": 0.85179, + "ndcg_at_100": 0.71775, + "ndcg_at_1000": 0.83657, + "ndcg_at_20": 0.82388, + "ndcg_at_3": 0.8936, + "ndcg_at_5": 0.88343, + "precision_at_1": 0.94286, + "precision_at_10": 0.81429, + "precision_at_100": 0.519, + "precision_at_1000": 0.09099, + "precision_at_20": 0.77143, + "precision_at_3": 0.87619, + "precision_at_5": 0.86286, + "recall_at_1": 0.02765, + "recall_at_10": 0.1307, + "recall_at_100": 0.60631, + "recall_at_1000": 0.93409, + "recall_at_20": 0.22272, + "recall_at_3": 0.05209, + "recall_at_5": 0.07985 + } + ] + }, + "task_name": "RParisEasyI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json new file mode 100644 index 0000000000..5b15d0ae51 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "fd121b6592fe946616fa85116703b94a4c61fd63", + "evaluation_time": 22.44923186302185, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.04286, + "cv_recall_at_10": 0.41429, + "cv_recall_at_100": 0.97143, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.6, + "cv_recall_at_3": 0.17143, + "cv_recall_at_5": 0.27143, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.04286, + "map_at_1": 0.00032, + "map_at_10": 0.00319, + "map_at_100": 0.04427, + "map_at_1000": 0.14044, + "map_at_20": 0.00602, + "map_at_3": 0.00096, + "map_at_5": 0.00146, + "mrr_at_1": 0.04285714285714286, + "mrr_at_10": 0.13515873015873012, + "mrr_at_100": 0.15938730320565236, + "mrr_at_1000": 0.15962520658618265, + "mrr_at_20": 0.14815883634769078, + "mrr_at_3": 0.0976190476190476, + "mrr_at_5": 0.11833333333333333, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": -0.6909430438842217, + "nauc_cv_recall_at_100_max": 0.6790382819794577, + "nauc_cv_recall_at_100_std": 0.4143323996265152, + "nauc_cv_recall_at_10_diff1": 0.2550034187718293, + "nauc_cv_recall_at_10_max": -0.26788387263688956, + "nauc_cv_recall_at_10_std": -0.3843993125496647, + "nauc_cv_recall_at_1_diff1": 0.3989804132009661, + "nauc_cv_recall_at_1_max": -0.12905822377247125, + "nauc_cv_recall_at_1_std": -0.401663536356319, + "nauc_cv_recall_at_20_diff1": 0.12233388457787381, + "nauc_cv_recall_at_20_max": -0.21169910528770744, + "nauc_cv_recall_at_20_std": -0.2534876818046898, + "nauc_cv_recall_at_3_diff1": 0.35100458047053923, + "nauc_cv_recall_at_3_max": -0.1396262752446388, + "nauc_cv_recall_at_3_std": -0.314985425775557, + "nauc_cv_recall_at_5_diff1": 0.35853258558905104, + "nauc_cv_recall_at_5_max": -0.1935699390601907, + "nauc_cv_recall_at_5_std": -0.4389893304719292, + "nauc_map_at_1000_diff1": -0.048721647577313475, + "nauc_map_at_1000_max": 0.02185883585100512, + "nauc_map_at_1000_std": -0.014590760062175558, + "nauc_map_at_100_diff1": 0.20175041133547802, + "nauc_map_at_100_max": -0.2999406736946551, + "nauc_map_at_100_std": -0.21513200915624464, + "nauc_map_at_10_diff1": 0.4340980160657004, + "nauc_map_at_10_max": -0.30137691467001876, + "nauc_map_at_10_std": -0.4166728188377905, + "nauc_map_at_1_diff1": 0.3225400054210336, + "nauc_map_at_1_max": -0.20455804791357643, + "nauc_map_at_1_std": -0.3502703918125217, + "nauc_map_at_20_diff1": 0.4482686549814925, + "nauc_map_at_20_max": -0.33767777626178874, + "nauc_map_at_20_std": -0.39261152175985364, + "nauc_map_at_3_diff1": 0.3906475942798504, + "nauc_map_at_3_max": -0.1421273626640729, + "nauc_map_at_3_std": -0.3164227766935927, + "nauc_map_at_5_diff1": 0.40607500232859556, + "nauc_map_at_5_max": -0.21636348183658996, + "nauc_map_at_5_std": -0.385395422874435, + "nauc_mrr_at_1000_diff1": 0.33380716561285934, + "nauc_mrr_at_1000_max": -0.19720559281432273, + "nauc_mrr_at_1000_std": -0.3739725633585593, + "nauc_mrr_at_100_diff1": 0.3320423802626735, + "nauc_mrr_at_100_max": -0.19584551091041755, + "nauc_mrr_at_100_std": -0.37267220068248286, + "nauc_mrr_at_10_diff1": 0.33810313585026747, + "nauc_mrr_at_10_max": -0.20898927155954197, + "nauc_mrr_at_10_std": -0.39317334419879707, + "nauc_mrr_at_1_diff1": 0.3989804132009661, + "nauc_mrr_at_1_max": -0.12905822377247125, + "nauc_mrr_at_1_std": -0.401663536356319, + "nauc_mrr_at_20_diff1": 0.3270365458084192, + "nauc_mrr_at_20_max": -0.206603016304459, + "nauc_mrr_at_20_std": -0.3731460382346558, + "nauc_mrr_at_3_diff1": 0.3569994314378864, + "nauc_mrr_at_3_max": -0.17174174325462235, + "nauc_mrr_at_3_std": -0.3561501867618589, + "nauc_mrr_at_5_diff1": 0.3571575774665211, + "nauc_mrr_at_5_max": -0.18834228874209263, + "nauc_mrr_at_5_std": -0.40200287994954503, + "nauc_ndcg_at_1000_diff1": -0.0743652757576749, + "nauc_ndcg_at_1000_max": 0.09391843454021216, + "nauc_ndcg_at_1000_std": 0.08425685683454673, + "nauc_ndcg_at_100_diff1": 0.023516077444591114, + "nauc_ndcg_at_100_max": -0.11892313686780007, + "nauc_ndcg_at_100_std": -0.16710001090586352, + "nauc_ndcg_at_10_diff1": 0.3882520721308202, + "nauc_ndcg_at_10_max": -0.2924404653976282, + "nauc_ndcg_at_10_std": -0.4189010025783995, + "nauc_ndcg_at_1_diff1": 0.3989804132009661, + "nauc_ndcg_at_1_max": -0.12905822377247125, + "nauc_ndcg_at_1_std": -0.401663536356319, + "nauc_ndcg_at_20_diff1": 0.3805388505094879, + "nauc_ndcg_at_20_max": -0.30519760291519316, + "nauc_ndcg_at_20_std": -0.3772566200501027, + "nauc_ndcg_at_3_diff1": 0.412201762045363, + "nauc_ndcg_at_3_max": -0.17116948069763432, + "nauc_ndcg_at_3_std": -0.3608861222611007, + "nauc_ndcg_at_5_diff1": 0.35696244084680845, + "nauc_ndcg_at_5_max": -0.25186160853169576, + "nauc_ndcg_at_5_std": -0.400509495428499, + "nauc_precision_at_1000_diff1": -0.22093427992384315, + "nauc_precision_at_1000_max": 0.2196784647696767, + "nauc_precision_at_1000_std": 0.04361074681539203, + "nauc_precision_at_100_diff1": -0.016149306619419775, + "nauc_precision_at_100_max": 0.034799312614065006, + "nauc_precision_at_100_std": -0.13882957890950748, + "nauc_precision_at_10_diff1": 0.38489780804914814, + "nauc_precision_at_10_max": -0.30500607571435273, + "nauc_precision_at_10_std": -0.41532331666205985, + "nauc_precision_at_1_diff1": 0.3989804132009661, + "nauc_precision_at_1_max": -0.12905822377247125, + "nauc_precision_at_1_std": -0.401663536356319, + "nauc_precision_at_20_diff1": 0.3692528574157947, + "nauc_precision_at_20_max": -0.30111599366142605, + "nauc_precision_at_20_std": -0.35015003540161194, + "nauc_precision_at_3_diff1": 0.428985570425912, + "nauc_precision_at_3_max": -0.1563930424168448, + "nauc_precision_at_3_std": -0.3449801647848643, + "nauc_precision_at_5_diff1": 0.34983601813446513, + "nauc_precision_at_5_max": -0.2741390952059421, + "nauc_precision_at_5_std": -0.4086645123951, + "nauc_recall_at_1000_diff1": -0.08562125010420926, + "nauc_recall_at_1000_max": 0.07813883934551541, + "nauc_recall_at_1000_std": 0.23218650354926193, + "nauc_recall_at_100_diff1": -0.01041195221018056, + "nauc_recall_at_100_max": -0.23261397534923414, + "nauc_recall_at_100_std": -0.05979942942574269, + "nauc_recall_at_10_diff1": 0.3361904724346995, + "nauc_recall_at_10_max": -0.28110372866797806, + "nauc_recall_at_10_std": -0.36784022778417585, + "nauc_recall_at_1_diff1": 0.3225400054210336, + "nauc_recall_at_1_max": -0.20455804791357643, + "nauc_recall_at_1_std": -0.3502703918125217, + "nauc_recall_at_20_diff1": 0.31944679632949374, + "nauc_recall_at_20_max": -0.32588194303425905, + "nauc_recall_at_20_std": -0.25546971593163326, + "nauc_recall_at_3_diff1": 0.36197730614062007, + "nauc_recall_at_3_max": -0.07376636861137077, + "nauc_recall_at_3_std": -0.2538555661579059, + "nauc_recall_at_5_diff1": 0.3711994561836934, + "nauc_recall_at_5_max": -0.1684006987187415, + "nauc_recall_at_5_std": -0.373838797717674, + "ndcg_at_1": 0.04286, + "ndcg_at_10": 0.08888, + "ndcg_at_100": 0.20767, + "ndcg_at_1000": 0.46304, + "ndcg_at_20": 0.0984, + "ndcg_at_3": 0.06137, + "ndcg_at_5": 0.07145, + "precision_at_1": 0.04286, + "precision_at_10": 0.10143, + "precision_at_100": 0.202, + "precision_at_1000": 0.09564, + "precision_at_20": 0.10857, + "precision_at_3": 0.06667, + "precision_at_5": 0.08, + "recall_at_1": 0.00032, + "recall_at_10": 0.0087, + "recall_at_100": 0.18965, + "recall_at_1000": 0.67973, + "recall_at_20": 0.01989, + "recall_at_3": 0.00187, + "recall_at_5": 0.00327 + } + ] + }, + "task_name": "RParisHardI2IRetrieval" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json deleted file mode 100644 index 0f309a07f3..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisI2IRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "bafc3a08fdffd72558021ce3a41250833d7e0e88", - "evaluation_time": 43.56014585494995, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 0.78348, - "cv_recall_at_10": 0.96558, - "cv_recall_at_100": 0.99812, - "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 0.98217, - "cv_recall_at_3": 0.89972, - "cv_recall_at_5": 0.93367, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 0.78348, - "map_at_1": 0.00183, - "map_at_10": 0.01259, - "map_at_100": 0.08554, - "map_at_1000": 0.22541, - "map_at_20": 0.02257, - "map_at_3": 0.0046, - "map_at_5": 0.00704, - "mrr_at_1": 0.9870150187734669, - "mrr_at_10": 0.9935075093867334, - "mrr_at_100": 0.9935075093867334, - "mrr_at_1000": 0.9935075093867334, - "mrr_at_20": 0.9935075093867334, - "mrr_at_3": 0.9935075093867334, - "mrr_at_5": 0.9935075093867334, - "nauc_cv_recall_at_1000_diff1": NaN, - "nauc_cv_recall_at_1000_max": NaN, - "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": 0.12564420871970045, - "nauc_cv_recall_at_100_max": 0.3655746256621164, - "nauc_cv_recall_at_100_std": -0.2306365991736563, - "nauc_cv_recall_at_10_diff1": -0.5058662936618029, - "nauc_cv_recall_at_10_max": 0.09207458773253613, - "nauc_cv_recall_at_10_std": 0.4663743642051124, - "nauc_cv_recall_at_1_diff1": -0.4013157463788695, - "nauc_cv_recall_at_1_max": 0.013502025515305102, - "nauc_cv_recall_at_1_std": 0.32242775071432206, - "nauc_cv_recall_at_20_diff1": -0.5198855681306671, - "nauc_cv_recall_at_20_max": 0.10151852705481003, - "nauc_cv_recall_at_20_std": 0.41412564884036973, - "nauc_cv_recall_at_3_diff1": -0.5198700551458323, - "nauc_cv_recall_at_3_max": 0.025885833103837443, - "nauc_cv_recall_at_3_std": 0.4187563971498901, - "nauc_cv_recall_at_5_diff1": -0.5209006482350635, - "nauc_cv_recall_at_5_max": 0.051797269624278094, - "nauc_cv_recall_at_5_std": 0.4360707529546872, - "nauc_map_at_1000_diff1": -0.32518052685711313, - "nauc_map_at_1000_max": 0.0038005812075390727, - "nauc_map_at_1000_std": 0.43958508353436493, - "nauc_map_at_100_diff1": -0.324729620306574, - "nauc_map_at_100_max": 0.003712984860206863, - "nauc_map_at_100_std": 0.3530965583894132, - "nauc_map_at_10_diff1": -0.2865369016119553, - "nauc_map_at_10_max": 0.012519773293503903, - "nauc_map_at_10_std": 0.23475872496725028, - "nauc_map_at_1_diff1": -0.0855732613663, - "nauc_map_at_1_max": 0.023221460373481606, - "nauc_map_at_1_std": 0.03199412568851411, - "nauc_map_at_20_diff1": -0.3089685409009752, - "nauc_map_at_20_max": 0.008164069309369689, - "nauc_map_at_20_std": 0.28138880870717525, - "nauc_map_at_3_diff1": -0.21596259885289204, - "nauc_map_at_3_max": 0.026694429615608712, - "nauc_map_at_3_std": 0.13815328440691188, - "nauc_map_at_5_diff1": -0.2539278390954876, - "nauc_map_at_5_max": 0.01952941733527699, - "nauc_map_at_5_std": 0.18318190468395143, - "nauc_mrr_at_1000_diff1": 1.0, - "nauc_mrr_at_1000_max": -0.13028347793646336, - "nauc_mrr_at_1000_std": -0.28303491792776114, - "nauc_mrr_at_100_diff1": 1.0, - "nauc_mrr_at_100_max": -0.13028347793646336, - "nauc_mrr_at_100_std": -0.28303491792776114, - "nauc_mrr_at_10_diff1": 1.0, - "nauc_mrr_at_10_max": -0.13028347793646336, - "nauc_mrr_at_10_std": -0.28303491792776114, - "nauc_mrr_at_1_diff1": 1.0, - "nauc_mrr_at_1_max": -0.1302834779364721, - "nauc_mrr_at_1_std": -0.2830349179277583, - "nauc_mrr_at_20_diff1": 1.0, - "nauc_mrr_at_20_max": -0.13028347793646336, - "nauc_mrr_at_20_std": -0.28303491792776114, - "nauc_mrr_at_3_diff1": 1.0, - "nauc_mrr_at_3_max": -0.13028347793646336, - "nauc_mrr_at_3_std": -0.28303491792776114, - "nauc_mrr_at_5_diff1": 1.0, - "nauc_mrr_at_5_max": -0.13028347793646336, - "nauc_mrr_at_5_std": -0.28303491792776114, - "nauc_ndcg_at_1000_diff1": -0.3461037602745701, - "nauc_ndcg_at_1000_max": 0.01170706578078914, - "nauc_ndcg_at_1000_std": 0.40425946623874776, - "nauc_ndcg_at_100_diff1": -0.39901516979641344, - "nauc_ndcg_at_100_max": -0.00685883548003498, - "nauc_ndcg_at_100_std": 0.465350992380673, - "nauc_ndcg_at_10_diff1": -0.3955198632793018, - "nauc_ndcg_at_10_max": -0.0037363142445774643, - "nauc_ndcg_at_10_std": 0.38110734984115535, - "nauc_ndcg_at_1_diff1": 1.0, - "nauc_ndcg_at_1_max": -0.14486457971944697, - "nauc_ndcg_at_1_std": -0.2830349179277583, - "nauc_ndcg_at_20_diff1": -0.41278062763010503, - "nauc_ndcg_at_20_max": -0.004138702255930161, - "nauc_ndcg_at_20_std": 0.4191652248206659, - "nauc_ndcg_at_3_diff1": -0.34367743707797543, - "nauc_ndcg_at_3_max": 0.004103711367444123, - "nauc_ndcg_at_3_std": 0.3101278349486688, - "nauc_ndcg_at_5_diff1": -0.37388771810166976, - "nauc_ndcg_at_5_max": -0.004694760292318884, - "nauc_ndcg_at_5_std": 0.3429402757237829, - "nauc_precision_at_1000_diff1": -0.1804983224177905, - "nauc_precision_at_1000_max": -0.01148320196422333, - "nauc_precision_at_1000_std": 0.2504460959613102, - "nauc_precision_at_100_diff1": -0.3911848294276243, - "nauc_precision_at_100_max": -0.007414470176849031, - "nauc_precision_at_100_std": 0.4673613835957893, - "nauc_precision_at_10_diff1": -0.4052976521224889, - "nauc_precision_at_10_max": -0.00433906156493395, - "nauc_precision_at_10_std": 0.39212256495307285, - "nauc_precision_at_1_diff1": 1.0, - "nauc_precision_at_1_max": -0.14486457971944697, - "nauc_precision_at_1_std": -0.2830349179277583, - "nauc_precision_at_20_diff1": -0.4173913678934324, - "nauc_precision_at_20_max": -0.004543026048413097, - "nauc_precision_at_20_std": 0.4299497661264597, - "nauc_precision_at_3_diff1": -0.37619357586668295, - "nauc_precision_at_3_max": 0.006637408448532989, - "nauc_precision_at_3_std": 0.3255087096886432, - "nauc_precision_at_5_diff1": -0.393543490911112, - "nauc_precision_at_5_max": -0.0054501833188476565, - "nauc_precision_at_5_std": 0.3553468241787662, - "nauc_recall_at_1000_diff1": -0.28865460441970253, - "nauc_recall_at_1000_max": 0.02031269144932309, - "nauc_recall_at_1000_std": 0.3581111189760722, - "nauc_recall_at_100_diff1": -0.31737603693043515, - "nauc_recall_at_100_max": 0.010815413934469631, - "nauc_recall_at_100_std": 0.32072251393381573, - "nauc_recall_at_10_diff1": -0.2835809857944858, - "nauc_recall_at_10_max": 0.01631628873177338, - "nauc_recall_at_10_std": 0.2203863362341707, - "nauc_recall_at_1_diff1": -0.0855732613663, - "nauc_recall_at_1_max": 0.023221460373481606, - "nauc_recall_at_1_std": 0.03199412568851411, - "nauc_recall_at_20_diff1": -0.30410786586422606, - "nauc_recall_at_20_max": 0.012058890403941508, - "nauc_recall_at_20_std": 0.2609223499236706, - "nauc_recall_at_3_diff1": -0.22686364377869106, - "nauc_recall_at_3_max": 0.02838523233952593, - "nauc_recall_at_3_std": 0.13768932686394217, - "nauc_recall_at_5_diff1": -0.2568046605350692, - "nauc_recall_at_5_max": 0.02092009985156803, - "nauc_recall_at_5_std": 0.17610456507060399, - "ndcg_at_1": 0.98702, - "ndcg_at_10": 0.76647, - "ndcg_at_100": 0.59394, - "ndcg_at_1000": 0.44945, - "ndcg_at_20": 0.71423, - "ndcg_at_3": 0.8678, - "ndcg_at_5": 0.82287, - "precision_at_1": 0.98702, - "precision_at_10": 0.72226, - "precision_at_100": 0.5574, - "precision_at_1000": 0.25148, - "precision_at_20": 0.67042, - "precision_at_3": 0.83521, - "precision_at_5": 0.7832, - "recall_at_1": 0.00183, - "recall_at_10": 0.01349, - "recall_at_100": 0.10451, - "recall_at_1000": 0.40829, - "recall_at_20": 0.02511, - "recall_at_3": 0.00467, - "recall_at_5": 0.0073 - } - ] - }, - "task_name": "RParisI2IRetrieval" -} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json new file mode 100644 index 0000000000..700a4a2774 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json @@ -0,0 +1,186 @@ +{ + "dataset_revision": "900267b49003a086979e8d52f6942624236bfc34", + "evaluation_time": 23.04814124107361, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "cv_recall_at_1": 0.98571, + "cv_recall_at_10": 1.0, + "cv_recall_at_100": 1.0, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 1.0, + "cv_recall_at_3": 1.0, + "cv_recall_at_5": 1.0, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.98571, + "map_at_1": 0.0055, + "map_at_10": 0.0505, + "map_at_100": 0.32128, + "map_at_1000": 0.53905, + "map_at_20": 0.09538, + "map_at_3": 0.01586, + "map_at_5": 0.02626, + "mrr_at_1": 0.9857142857142858, + "mrr_at_10": 0.9928571428571429, + "mrr_at_100": 0.9928571428571429, + "mrr_at_1000": 0.9928571428571429, + "mrr_at_20": 0.9928571428571429, + "mrr_at_3": 0.9928571428571429, + "mrr_at_5": 0.9928571428571429, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": NaN, + "nauc_cv_recall_at_10_max": NaN, + "nauc_cv_recall_at_10_std": NaN, + "nauc_cv_recall_at_1_diff1": 0.8692810457516342, + "nauc_cv_recall_at_1_max": -1.151727357609713, + "nauc_cv_recall_at_1_std": -0.5634920634920669, + "nauc_cv_recall_at_20_diff1": NaN, + "nauc_cv_recall_at_20_max": NaN, + "nauc_cv_recall_at_20_std": NaN, + "nauc_cv_recall_at_3_diff1": NaN, + "nauc_cv_recall_at_3_max": NaN, + "nauc_cv_recall_at_3_std": NaN, + "nauc_cv_recall_at_5_diff1": NaN, + "nauc_cv_recall_at_5_max": NaN, + "nauc_cv_recall_at_5_std": NaN, + "nauc_map_at_1000_diff1": -0.5202144222378728, + "nauc_map_at_1000_max": 0.11126568280766698, + "nauc_map_at_1000_std": 0.32813155264948524, + "nauc_map_at_100_diff1": -0.17082735069359992, + "nauc_map_at_100_max": -0.07951969633170079, + "nauc_map_at_100_std": 0.13798406208276, + "nauc_map_at_10_diff1": 0.08925396593997569, + "nauc_map_at_10_max": -0.078676032579074, + "nauc_map_at_10_std": 0.13021224342356594, + "nauc_map_at_1_diff1": 0.20034146659034022, + "nauc_map_at_1_max": -0.1488526539306114, + "nauc_map_at_1_std": 0.036837238597286596, + "nauc_map_at_20_diff1": 0.044029095752394776, + "nauc_map_at_20_max": -0.044115438949865464, + "nauc_map_at_20_std": 0.16589359171211718, + "nauc_map_at_3_diff1": 0.12936241106392424, + "nauc_map_at_3_max": -0.11171026414768116, + "nauc_map_at_3_std": 0.09557523635145118, + "nauc_map_at_5_diff1": 0.12090379123610429, + "nauc_map_at_5_max": -0.11656881579212829, + "nauc_map_at_5_std": 0.09921919672363319, + "nauc_mrr_at_1000_diff1": 0.8692810457516355, + "nauc_mrr_at_1000_max": -1.1517273576096931, + "nauc_mrr_at_1000_std": -0.5634920634920526, + "nauc_mrr_at_100_diff1": 0.8692810457516355, + "nauc_mrr_at_100_max": -1.1517273576096931, + "nauc_mrr_at_100_std": -0.5634920634920526, + "nauc_mrr_at_10_diff1": 0.8692810457516355, + "nauc_mrr_at_10_max": -1.1517273576096931, + "nauc_mrr_at_10_std": -0.5634920634920526, + "nauc_mrr_at_1_diff1": 0.8692810457516342, + "nauc_mrr_at_1_max": -1.151727357609713, + "nauc_mrr_at_1_std": -0.5634920634920669, + "nauc_mrr_at_20_diff1": 0.8692810457516355, + "nauc_mrr_at_20_max": -1.1517273576096931, + "nauc_mrr_at_20_std": -0.5634920634920526, + "nauc_mrr_at_3_diff1": 0.8692810457516355, + "nauc_mrr_at_3_max": -1.1517273576096931, + "nauc_mrr_at_3_std": -0.5634920634920526, + "nauc_mrr_at_5_diff1": 0.8692810457516355, + "nauc_mrr_at_5_max": -1.1517273576096931, + "nauc_mrr_at_5_std": -0.5634920634920526, + "nauc_ndcg_at_1000_diff1": -0.44820569134469457, + "nauc_ndcg_at_1000_max": 0.14278328260517373, + "nauc_ndcg_at_1000_std": 0.35909204639356485, + "nauc_ndcg_at_100_diff1": -0.6018888319473394, + "nauc_ndcg_at_100_max": 0.21192336225080224, + "nauc_ndcg_at_100_std": 0.25475321800547673, + "nauc_ndcg_at_10_diff1": -0.5815570895956712, + "nauc_ndcg_at_10_max": 0.2384364105122267, + "nauc_ndcg_at_10_std": 0.47588628797056887, + "nauc_ndcg_at_1_diff1": 0.8692810457516342, + "nauc_ndcg_at_1_max": -1.151727357609713, + "nauc_ndcg_at_1_std": -0.5634920634920669, + "nauc_ndcg_at_20_diff1": -0.5953841029416077, + "nauc_ndcg_at_20_max": 0.32157620990503083, + "nauc_ndcg_at_20_std": 0.47071964995241883, + "nauc_ndcg_at_3_diff1": -0.47333802126140007, + "nauc_ndcg_at_3_max": 0.13407004770022382, + "nauc_ndcg_at_3_std": 0.46679765020950414, + "nauc_ndcg_at_5_diff1": -0.5587389095148461, + "nauc_ndcg_at_5_max": 0.07339891244234566, + "nauc_ndcg_at_5_std": 0.47215237618630984, + "nauc_precision_at_1000_diff1": -0.3950971876695561, + "nauc_precision_at_1000_max": 0.24406161426957987, + "nauc_precision_at_1000_std": 0.12174947607128742, + "nauc_precision_at_100_diff1": -0.5472068296947269, + "nauc_precision_at_100_max": 0.23378022737023205, + "nauc_precision_at_100_std": 0.1829472692928344, + "nauc_precision_at_10_diff1": -0.5950517610712319, + "nauc_precision_at_10_max": 0.28672134365425656, + "nauc_precision_at_10_std": 0.48780862485384807, + "nauc_precision_at_1_diff1": 0.8692810457516342, + "nauc_precision_at_1_max": -1.151727357609713, + "nauc_precision_at_1_std": -0.5634920634920669, + "nauc_precision_at_20_diff1": -0.5947364591012498, + "nauc_precision_at_20_max": 0.35422908868857506, + "nauc_precision_at_20_std": 0.4705389149562903, + "nauc_precision_at_3_diff1": -0.510094916344916, + "nauc_precision_at_3_max": 0.22297297297297156, + "nauc_precision_at_3_std": 0.5162886100386108, + "nauc_precision_at_5_diff1": -0.6028283796740131, + "nauc_precision_at_5_max": 0.11490891658676922, + "nauc_precision_at_5_std": 0.504458293384468, + "nauc_recall_at_1000_diff1": -0.2460203893111549, + "nauc_recall_at_1000_max": 0.07681666757366377, + "nauc_recall_at_1000_std": 0.29402261311504047, + "nauc_recall_at_100_diff1": -0.10390008545301123, + "nauc_recall_at_100_max": -0.09447732649357599, + "nauc_recall_at_100_std": 0.0928830407002215, + "nauc_recall_at_10_diff1": 0.10590480631127752, + "nauc_recall_at_10_max": -0.08162621409653509, + "nauc_recall_at_10_std": 0.11847839880663995, + "nauc_recall_at_1_diff1": 0.20034146659034022, + "nauc_recall_at_1_max": -0.1488526539306114, + "nauc_recall_at_1_std": 0.036837238597286596, + "nauc_recall_at_20_diff1": 0.06586007946887822, + "nauc_recall_at_20_max": -0.05281307817884481, + "nauc_recall_at_20_std": 0.14970188120798883, + "nauc_recall_at_3_diff1": 0.13126830970729877, + "nauc_recall_at_3_max": -0.10596900293507791, + "nauc_recall_at_3_std": 0.09476793236811286, + "nauc_recall_at_5_diff1": 0.12983221018552543, + "nauc_recall_at_5_max": -0.11532911944953175, + "nauc_recall_at_5_std": 0.09212571294283788, + "ndcg_at_1": 0.98571, + "ndcg_at_10": 0.92841, + "ndcg_at_100": 0.77588, + "ndcg_at_1000": 0.7685, + "ndcg_at_20": 0.89894, + "ndcg_at_3": 0.95291, + "ndcg_at_5": 0.94991, + "precision_at_1": 0.98571, + "precision_at_10": 0.91571, + "precision_at_100": 0.721, + "precision_at_1000": 0.18663, + "precision_at_20": 0.88, + "precision_at_3": 0.94286, + "precision_at_5": 0.94286, + "recall_at_1": 0.0055, + "recall_at_10": 0.05165, + "recall_at_100": 0.36172, + "recall_at_1000": 0.79065, + "recall_at_20": 0.0987, + "recall_at_3": 0.01595, + "recall_at_5": 0.0266 + } + ] + }, + "task_name": "RParisMediumI2IRetrieval" +} \ No newline at end of file From de9c57388b96d5fc2cc730b4e55a2eda8390866f Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 21 Oct 2024 16:25:24 +0100 Subject: [PATCH 64/73] make lint --- mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py | 4 +++- mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index ad6837bc99..a564d6bb31 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -49,6 +49,7 @@ class ROxfordEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): ) skip_first_result = False + class ROxfordMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="ROxfordMediumI2IRetrieval", @@ -94,6 +95,7 @@ class ROxfordMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): ) skip_first_result = False + class ROxfordHardI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="ROxfordHardI2IRetrieval", @@ -137,4 +139,4 @@ class ROxfordHardI2IRetrieval(AbsTaskAny2AnyRetrieval): }, }, ) - skip_first_result = False \ No newline at end of file + skip_first_result = False diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index e5687ce125..3c3611a54e 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -49,6 +49,7 @@ class RParisEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): ) skip_first_result = False + class RParisMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RParisMediumI2IRetrieval", @@ -94,6 +95,7 @@ class RParisMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): ) skip_first_result = False + class RParisHardI2IRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RParisHardI2IRetrieval", @@ -137,4 +139,4 @@ class RParisHardI2IRetrieval(AbsTaskAny2AnyRetrieval): }, }, ) - skip_first_result = False \ No newline at end of file + skip_first_result = False From 50af768ed3b8de391ca67161bb2e0e6eabc2d02d Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Mon, 28 Oct 2024 14:45:25 +0000 Subject: [PATCH 65/73] add BLINK as multi choice tasks --- .../Image/Any2AnyMultiChoice/__init__.py | 2 + .../eng/BLINKIT2IMultiChoice.py | 49 +++++++++++++++++++ .../eng/BLINKIT2TMultiChoice.py | 48 ++++++++++++++++++ .../eng/ImageCoDeT2IMultiChoice.py | 4 +- .../BLINKIT2IMultiChoice.json | 33 +++++++++++++ .../BLINKIT2TMultiChoice.json | 33 +++++++++++++ 6 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py create mode 100644 mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json create mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py b/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py index b317e8cabd..c818af7048 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/__init__.py @@ -1,3 +1,5 @@ from __future__ import annotations +from .eng.BLINKIT2IMultiChoice import * +from .eng.BLINKIT2TMultiChoice import * from .eng.ImageCoDeT2IMultiChoice import * diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py new file mode 100644 index 0000000000..ee376f1631 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): + metadata = TaskMetadata( + name="BLINKIT2IMultiChoice", + description="Retrieve images based on images and specific retrieval instructions.", + reference="https://arxiv.org/abs/2404.12390", + dataset={ + "path": "JamieSJS/blink-it2i-multi", + "revision": "780ade70cd769e586502a61dda903e525f945a45", + "trust_remote_code": True, + }, + type="Any2AnyMultiChoice", + category="it2i", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2018-01-01", "2018-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{fu2024blink, + title={Blink: Multimodal large language models can see but not perceive}, + author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal={arXiv preprint arXiv:2404.12390}, + year={2024} +} +""", + descriptive_stats={ + "n_samples": {"test": 402}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 804, + "num_queries": 402, + "average_relevant_docs_per_query": 1, + } + }, + }, + ) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py new file mode 100644 index 0000000000..3af244dc28 --- /dev/null +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from mteb.abstasks.Image.AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): + metadata = TaskMetadata( + name="BLINKIT2TMultiChoice", + description="Retrieve the correct text answer based on images and specific retrieval instructions.", + reference="https://arxiv.org/abs/2404.12390", + dataset={ + "path": "JamieSJS/blink-it2t-multi", + "revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", + }, + type="Any2AnyMultiChoice", + category="it2t", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2018-01-01", "2018-12-31"), + domains=["Encyclopaedic"], + task_subtypes=["Image Text Retrieval"], + license="CC BY-SA 4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="found", + bibtex_citation="""@article{fu2024blink, + title={Blink: Multimodal large language models can see but not perceive}, + author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal={arXiv preprint arXiv:2404.12390}, + year={2024} +} +""", + descriptive_stats={ + "n_samples": {"test": 1073}, + "avg_character_length": { + "test": { + "average_document_length": 0.0, + "average_query_length": 0.0, + "num_documents": 26, + "num_queries": 1073, + "average_relevant_docs_per_query": 1, + } + }, + }, + ) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py index 46fbb5b990..160ba0d39c 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py @@ -14,10 +14,10 @@ class ImageCoDeT2IMultiChoice(AbsTaskAny2AnyMultiChoice): "revision": "d28adfd8b34fefa546fdf94bdc352622b2575f6c", }, type="Any2AnyMultiChoice", - category="t2i", + category="it2i", eval_splits=["test"], eval_langs=["eng-Latn"], - main_score="ndcg_at_1", + main_score="accuracy", date=("2022-05-22", "2022-05-27"), # conference dates form=["written"], domains=["Web"], diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json new file mode 100644 index 0000000000..648d3aa59e --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "780ade70cd769e586502a61dda903e525f945a45", + "evaluation_time": 56.62301731109619, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.70149, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.70149, + "mrr_at_1": 0.7014925373134329, + "mrr_at_10": 0.8507462686567164, + "mrr_at_100": 0.8507462686567164, + "mrr_at_1000": 0.8507462686567164, + "mrr_at_20": 0.8507462686567164, + "mrr_at_3": 0.8507462686567164, + "mrr_at_5": 0.8507462686567164, + "ndcg_at_1": 0.70149, + "ndcg_at_10": 0.88983, + "ndcg_at_100": 0.88983, + "ndcg_at_1000": 0.88983, + "ndcg_at_20": 0.88983, + "ndcg_at_3": 0.88983, + "ndcg_at_5": 0.88983 + } + ] + }, + "task_name": "BLINKIT2IMultiChoice" +} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json new file mode 100644 index 0000000000..bda6cd2cb9 --- /dev/null +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", + "evaluation_time": 43.71325731277466, + "kg_co2_emissions": null, + "mteb_version": "1.12.90", + "scores": { + "test": [ + { + "accuracy": 0.38397, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ], + "main_score": 0.38397, + "mrr_at_1": 0.38397017707362535, + "mrr_at_10": 0.650512581547066, + "mrr_at_100": 0.650512581547066, + "mrr_at_1000": 0.650512581547066, + "mrr_at_20": 0.650512581547066, + "mrr_at_3": 0.6295433364398889, + "mrr_at_5": 0.650512581547066, + "ndcg_at_1": 0.38397, + "ndcg_at_10": 0.73974, + "ndcg_at_100": 0.73974, + "ndcg_at_1000": 0.73974, + "ndcg_at_20": 0.73974, + "ndcg_at_3": 0.70361, + "ndcg_at_5": 0.73974 + } + ] + }, + "task_name": "BLINKIT2TMultiChoice" +} \ No newline at end of file From 27444fd1567335036664e8c4b08fa30982d35b6f Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Tue, 29 Oct 2024 14:06:38 +0000 Subject: [PATCH 66/73] fix: license metadata in wrong format --- mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py | 2 +- mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index ee376f1631..d600eaa4f2 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -22,7 +22,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): date=("2018-01-01", "2018-12-31"), domains=["Encyclopaedic"], task_subtypes=["Image Text Retrieval"], - license="CC BY-SA 4.0", + license="not specified", annotations_creators="derived", dialect=[], modalities=["text", "image"], diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index 3af244dc28..fe37216de0 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -21,7 +21,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): date=("2018-01-01", "2018-12-31"), domains=["Encyclopaedic"], task_subtypes=["Image Text Retrieval"], - license="CC BY-SA 4.0", + license="not specified", annotations_creators="derived", dialect=[], modalities=["text", "image"], From 1abce84f24a2702ca5474683fcaf5cbab7d94c28 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 31 Oct 2024 14:58:53 +0000 Subject: [PATCH 67/73] remove null examples from corpus of ROxford and RParis --- .../eng/ROxfordI2IRetrieval.py | 12 +- .../eng/RParisI2IRetrieval.py | 10 +- .../ROxfordEasyI2IRetrieval.json | 334 ++++++++--------- .../ROxfordHardI2IRetrieval.json | 342 +++++++++--------- .../ROxfordMediumI2IRetrieval.json | 334 ++++++++--------- .../RParisEasyI2IRetrieval.json | 294 +++++++-------- .../RParisHardI2IRetrieval.json | 334 ++++++++--------- .../RParisMediumI2IRetrieval.json | 294 +++++++-------- .../model_meta.json | 2 +- 9 files changed, 978 insertions(+), 978 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index e338eec2d1..dbec8e6ae7 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -11,7 +11,7 @@ class ROxfordEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-oxford-easy", - "revision": "3f018eb7ad32218a5a4ebd704493e0834a265cf5", + "revision": "b71b5f67a93aa63761b79a67bcf28bd2ae590902", }, type="Any2AnyRetrieval", category="i2i", @@ -40,7 +40,7 @@ class ROxfordEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 4993, + "num_documents": 516, "num_queries": 70, "average_relevant_docs_per_query": 43.3, } @@ -57,7 +57,7 @@ class ROxfordMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-oxford-medium", - "revision": "3bd28e9c45e15f299117c634799f7035c4de2d31", + "revision": "1dfb86730ee4b3f49b441f4896d473c83eb5ff0d", }, type="Any2AnyRetrieval", category="i2i", @@ -86,7 +86,7 @@ class ROxfordMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 4993, + "num_documents": 788, "num_queries": 70, "average_relevant_docs_per_query": 78.9, } @@ -103,7 +103,7 @@ class ROxfordHardI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Oxford_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-oxford-hard", - "revision": "f20b30211b7ba3fc64a02bd83998fe75f3023719", + "revision": "f71ab9d4aabcda93d55a7e65edfb3a34767d89e6", }, type="Any2AnyRetrieval", category="i2i", @@ -132,7 +132,7 @@ class ROxfordHardI2IRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 4993, + "num_documents": 685, "num_queries": 70, "average_relevant_docs_per_query": 35.7, } diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index f29a9849ef..8c2f6344fb 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -11,7 +11,7 @@ class RParisEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-paris-easy", - "revision": "a7293da8a341de665ee4dcb2f209281df342d80b", + "revision": "7d821ddebcb30ad343133e3a81e23347ac2a08a8", }, type="Any2AnyRetrieval", category="i2i", @@ -40,7 +40,7 @@ class RParisEasyI2IRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 6322, + "num_documents": 1470, "num_queries": 70, "average_relevant_docs_per_query": 98.2, } @@ -57,7 +57,7 @@ class RParisMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-paris-medium", - "revision": "900267b49003a086979e8d52f6942624236bfc34", + "revision": "3d959815e102785efd628170281f1e65561b03d2", }, type="Any2AnyRetrieval", category="i2i", @@ -86,7 +86,7 @@ class RParisMediumI2IRetrieval(AbsTaskAny2AnyRetrieval): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 6322, + "num_documents": 2651, "num_queries": 70, "average_relevant_docs_per_query": 147.9, } @@ -103,7 +103,7 @@ class RParisHardI2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://openaccess.thecvf.com/content_cvpr_2018/html/Radenovic_Revisiting_Paris_and_CVPR_2018_paper.html", dataset={ "path": "JamieSJS/r-paris-hard", - "revision": "fd121b6592fe946616fa85116703b94a4c61fd63", + "revision": "d3e0adf4e942446c04427511ccce281c86861248", }, type="Any2AnyRetrieval", category="i2i", diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json index 319e3d389b..738473a5e4 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordEasyI2IRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "3f018eb7ad32218a5a4ebd704493e0834a265cf5", - "evaluation_time": 17.977893829345703, + "dataset_revision": "b71b5f67a93aa63761b79a67bcf28bd2ae590902", + "evaluation_time": 18.838356256484985, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.73529, - "cv_recall_at_10": 0.88235, - "cv_recall_at_100": 0.95588, + "cv_recall_at_1": 0.88235, + "cv_recall_at_10": 0.97059, + "cv_recall_at_100": 1.0, "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 0.89706, - "cv_recall_at_3": 0.79412, - "cv_recall_at_5": 0.82353, + "cv_recall_at_20": 0.98529, + "cv_recall_at_3": 0.91176, + "cv_recall_at_5": 0.94118, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.73529, - "map_at_1": 0.12315, - "map_at_10": 0.25156, - "map_at_100": 0.39829, - "map_at_1000": 0.46268, - "map_at_20": 0.30547, - "map_at_3": 0.16397, - "map_at_5": 0.20244, - "mrr_at_1": 0.7352941176470589, - "mrr_at_10": 0.7796977124183007, - "mrr_at_100": 0.782038983404294, - "mrr_at_1000": 0.7822887502278202, - "mrr_at_20": 0.7806168300653595, - "mrr_at_3": 0.7647058823529411, - "mrr_at_5": 0.7713235294117647, + "main_score": 0.88235, + "map_at_1": 0.18698, + "map_at_10": 0.44691, + "map_at_100": 0.66159, + "map_at_1000": 0.73348, + "map_at_20": 0.53479, + "map_at_3": 0.30295, + "map_at_5": 0.36197, + "mrr_at_1": 0.8823529411764706, + "mrr_at_10": 0.908700980392157, + "mrr_at_100": 0.9098240302312877, + "mrr_at_1000": 0.9098240302312877, + "mrr_at_20": 0.9095660322952712, + "mrr_at_3": 0.8970588235294118, + "mrr_at_5": 0.9044117647058824, "nauc_cv_recall_at_1000_diff1": NaN, "nauc_cv_recall_at_1000_max": NaN, "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": 0.43205924601836393, - "nauc_cv_recall_at_100_max": 0.46045430774676144, - "nauc_cv_recall_at_100_std": 0.092928936942703, - "nauc_cv_recall_at_10_diff1": 0.5238482274120124, - "nauc_cv_recall_at_10_max": 0.5447795999610322, - "nauc_cv_recall_at_10_std": -0.06773626862669024, - "nauc_cv_recall_at_1_diff1": 0.12227878218725667, - "nauc_cv_recall_at_1_max": 0.6674736854281768, - "nauc_cv_recall_at_1_std": 0.14078948712021733, - "nauc_cv_recall_at_20_diff1": 0.5375955313486179, - "nauc_cv_recall_at_20_max": 0.46954147068219054, - "nauc_cv_recall_at_20_std": -0.09380403989753433, - "nauc_cv_recall_at_3_diff1": 0.032732730804488214, - "nauc_cv_recall_at_3_max": 0.6199038499058639, - "nauc_cv_recall_at_3_std": 0.1529241887407745, - "nauc_cv_recall_at_5_diff1": 0.05658962411663845, - "nauc_cv_recall_at_5_max": 0.5877996333799903, - "nauc_cv_recall_at_5_std": 0.07730142560650717, - "nauc_map_at_1000_diff1": -0.11043090289526158, - "nauc_map_at_1000_max": 0.3448001394797035, - "nauc_map_at_1000_std": 0.27617723792125437, - "nauc_map_at_100_diff1": -0.01003127759400255, - "nauc_map_at_100_max": 0.1251706076455483, - "nauc_map_at_100_std": 0.03701229955981713, - "nauc_map_at_10_diff1": 0.2139884419568207, - "nauc_map_at_10_max": -0.26177042855989124, - "nauc_map_at_10_std": -0.26537536804238954, - "nauc_map_at_1_diff1": 0.21399681007835022, - "nauc_map_at_1_max": -0.3465990811601411, - "nauc_map_at_1_std": -0.29206561782006085, - "nauc_map_at_20_diff1": 0.1442752699133972, - "nauc_map_at_20_max": -0.15930665035054287, - "nauc_map_at_20_std": -0.21656927646466867, - "nauc_map_at_3_diff1": 0.23213875592486613, - "nauc_map_at_3_max": -0.3123546266529488, - "nauc_map_at_3_std": -0.28815041773010014, - "nauc_map_at_5_diff1": 0.2555448109113392, - "nauc_map_at_5_max": -0.29279690087015, - "nauc_map_at_5_std": -0.2774467752271305, - "nauc_mrr_at_1000_diff1": 0.12342303172342535, - "nauc_mrr_at_1000_max": 0.6362159972261828, - "nauc_mrr_at_1000_std": 0.12097023869090642, - "nauc_mrr_at_100_diff1": 0.12414954187540776, - "nauc_mrr_at_100_max": 0.6357802506977609, - "nauc_mrr_at_100_std": 0.12039621076657632, - "nauc_mrr_at_10_diff1": 0.12909230332110294, - "nauc_mrr_at_10_max": 0.6368549836172475, - "nauc_mrr_at_10_std": 0.11895636170173347, - "nauc_mrr_at_1_diff1": 0.12227878218725667, - "nauc_mrr_at_1_max": 0.6674736854281768, - "nauc_mrr_at_1_std": 0.14078948712021733, - "nauc_mrr_at_20_diff1": 0.1279569336937743, - "nauc_mrr_at_20_max": 0.6347997662966113, - "nauc_mrr_at_20_std": 0.11886311296272292, - "nauc_mrr_at_3_diff1": 0.0819493669888214, - "nauc_mrr_at_3_max": 0.6460493610936507, - "nauc_mrr_at_3_std": 0.14625466858168346, - "nauc_mrr_at_5_diff1": 0.08926776001340989, - "nauc_mrr_at_5_max": 0.6409743370271209, - "nauc_mrr_at_5_std": 0.13086158199344464, - "nauc_ndcg_at_1000_diff1": 0.0019895005695910535, - "nauc_ndcg_at_1000_max": 0.45479785106891296, - "nauc_ndcg_at_1000_std": 0.24337684988960884, - "nauc_ndcg_at_100_diff1": -0.09459953169491665, - "nauc_ndcg_at_100_max": 0.3603949417192614, - "nauc_ndcg_at_100_std": 0.25977974826865124, - "nauc_ndcg_at_10_diff1": -0.13743096053058393, - "nauc_ndcg_at_10_max": 0.5086455404107587, - "nauc_ndcg_at_10_std": 0.2942246989065215, - "nauc_ndcg_at_1_diff1": 0.12227878218725667, - "nauc_ndcg_at_1_max": 0.6674736854281768, - "nauc_ndcg_at_1_std": 0.14078948712021733, - "nauc_ndcg_at_20_diff1": -0.12980804316788766, - "nauc_ndcg_at_20_max": 0.4455786946909776, - "nauc_ndcg_at_20_std": 0.31146671038942925, - "nauc_ndcg_at_3_diff1": -0.12622874187697183, - "nauc_ndcg_at_3_max": 0.6321053376737653, - "nauc_ndcg_at_3_std": 0.2970450461706155, - "nauc_ndcg_at_5_diff1": -0.1167444253887786, - "nauc_ndcg_at_5_max": 0.5830509606014207, - "nauc_ndcg_at_5_std": 0.3092017793979728, - "nauc_precision_at_1000_diff1": -0.20870904079910094, - "nauc_precision_at_1000_max": 0.553838513916762, - "nauc_precision_at_1000_std": 0.5530198613029336, - "nauc_precision_at_100_diff1": -0.35505246743099134, - "nauc_precision_at_100_max": 0.7356593773083951, - "nauc_precision_at_100_std": 0.6881226249275548, - "nauc_precision_at_10_diff1": -0.32094129231704527, - "nauc_precision_at_10_max": 0.6957334058235605, - "nauc_precision_at_10_std": 0.4986922302670923, - "nauc_precision_at_1_diff1": 0.12227878218725667, - "nauc_precision_at_1_max": 0.6674736854281768, - "nauc_precision_at_1_std": 0.14078948712021733, - "nauc_precision_at_20_diff1": -0.348557232862726, - "nauc_precision_at_20_max": 0.7349093491157456, - "nauc_precision_at_20_std": 0.5403176104598045, - "nauc_precision_at_3_diff1": -0.19598105078336095, - "nauc_precision_at_3_max": 0.6900728385079788, - "nauc_precision_at_3_std": 0.38284792684568514, - "nauc_precision_at_5_diff1": -0.24325694519449578, - "nauc_precision_at_5_max": 0.6894112316594818, - "nauc_precision_at_5_std": 0.43885055756284386, - "nauc_recall_at_1000_diff1": 0.1247114496786093, - "nauc_recall_at_1000_max": 0.1972100012754324, - "nauc_recall_at_1000_std": 0.45358985850344147, - "nauc_recall_at_100_diff1": -0.007219441799189107, - "nauc_recall_at_100_max": -0.2726450323976995, - "nauc_recall_at_100_std": -0.09774220148121496, - "nauc_recall_at_10_diff1": 0.2493200864593003, - "nauc_recall_at_10_max": -0.35624261159143006, - "nauc_recall_at_10_std": -0.3178591018768707, - "nauc_recall_at_1_diff1": 0.21399681007835022, - "nauc_recall_at_1_max": -0.3465990811601411, - "nauc_recall_at_1_std": -0.29206561782006085, - "nauc_recall_at_20_diff1": 0.17625014384578389, - "nauc_recall_at_20_max": -0.3372766383490987, - "nauc_recall_at_20_std": -0.30264233629183895, - "nauc_recall_at_3_diff1": 0.23996176062420918, - "nauc_recall_at_3_max": -0.3204556669767577, - "nauc_recall_at_3_std": -0.292832765895384, - "nauc_recall_at_5_diff1": 0.28900424501627087, - "nauc_recall_at_5_max": -0.31760291427784704, - "nauc_recall_at_5_std": -0.2855625586155051, - "ndcg_at_1": 0.73529, - "ndcg_at_10": 0.60178, - "ndcg_at_100": 0.61347, - "ndcg_at_1000": 0.69898, - "ndcg_at_20": 0.58929, - "ndcg_at_3": 0.62141, - "ndcg_at_5": 0.61571, - "precision_at_1": 0.73529, - "precision_at_10": 0.45, - "precision_at_100": 0.19691, - "precision_at_1000": 0.04228, - "precision_at_20": 0.37647, - "precision_at_3": 0.54412, - "precision_at_5": 0.51176, - "recall_at_1": 0.12315, - "recall_at_10": 0.32137, - "recall_at_100": 0.68206, - "recall_at_1000": 0.93425, - "recall_at_20": 0.43253, - "recall_at_3": 0.16748, - "recall_at_5": 0.22328 + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": 0.45647666859182273, + "nauc_cv_recall_at_10_max": 0.7947783849423156, + "nauc_cv_recall_at_10_std": 0.07401479495347633, + "nauc_cv_recall_at_1_diff1": 0.15992222589126076, + "nauc_cv_recall_at_1_max": 0.7066438838407989, + "nauc_cv_recall_at_1_std": 0.34516803717873823, + "nauc_cv_recall_at_20_diff1": 0.5540376442015769, + "nauc_cv_recall_at_20_max": 0.8688524590163946, + "nauc_cv_recall_at_20_std": 0.7207043108682435, + "nauc_cv_recall_at_3_diff1": 0.02979739943425187, + "nauc_cv_recall_at_3_max": 0.6677398031346646, + "nauc_cv_recall_at_3_std": 0.19367311898769965, + "nauc_cv_recall_at_5_diff1": 0.33506965405558836, + "nauc_cv_recall_at_5_max": 0.8646023072252566, + "nauc_cv_recall_at_5_std": 0.3439494354763536, + "nauc_map_at_1000_diff1": -0.0748418292398556, + "nauc_map_at_1000_max": 0.1642828370088438, + "nauc_map_at_1000_std": 0.05154074134715094, + "nauc_map_at_100_diff1": 0.18300297522101533, + "nauc_map_at_100_max": -0.09630611530306471, + "nauc_map_at_100_std": -0.27588270039938373, + "nauc_map_at_10_diff1": 0.3759801235280281, + "nauc_map_at_10_max": -0.4332228813048274, + "nauc_map_at_10_std": -0.513562318981569, + "nauc_map_at_1_diff1": 0.3294708101134731, + "nauc_map_at_1_max": -0.42544232201495896, + "nauc_map_at_1_std": -0.5038005227813385, + "nauc_map_at_20_diff1": 0.32605775541629023, + "nauc_map_at_20_max": -0.3650620149830886, + "nauc_map_at_20_std": -0.48968947212775454, + "nauc_map_at_3_diff1": 0.3734075945845939, + "nauc_map_at_3_max": -0.46014380098464513, + "nauc_map_at_3_std": -0.5246559323671462, + "nauc_map_at_5_diff1": 0.3703663478894585, + "nauc_map_at_5_max": -0.45530691941081636, + "nauc_map_at_5_std": -0.5435081911059385, + "nauc_mrr_at_1000_diff1": 0.1563665959264749, + "nauc_mrr_at_1000_max": 0.7110551871683163, + "nauc_mrr_at_1000_std": 0.3006510045832456, + "nauc_mrr_at_100_diff1": 0.1563665959264749, + "nauc_mrr_at_100_max": 0.7110551871683163, + "nauc_mrr_at_100_std": 0.3006510045832456, + "nauc_mrr_at_10_diff1": 0.15944197884390138, + "nauc_mrr_at_10_max": 0.7115982604675308, + "nauc_mrr_at_10_std": 0.2934876531933919, + "nauc_mrr_at_1_diff1": 0.15992222589126076, + "nauc_mrr_at_1_max": 0.7066438838407989, + "nauc_mrr_at_1_std": 0.34516803717873823, + "nauc_mrr_at_20_diff1": 0.15751333465270692, + "nauc_mrr_at_20_max": 0.7115102171316658, + "nauc_mrr_at_20_std": 0.3018622856043013, + "nauc_mrr_at_3_diff1": 0.10362708820418606, + "nauc_mrr_at_3_max": 0.6898130404147407, + "nauc_mrr_at_3_std": 0.2796276825003452, + "nauc_mrr_at_5_diff1": 0.15680775836822267, + "nauc_mrr_at_5_max": 0.7221234773231954, + "nauc_mrr_at_5_std": 0.30966199967981883, + "nauc_ndcg_at_1000_diff1": -0.08504950934995431, + "nauc_ndcg_at_1000_max": 0.3562828960467138, + "nauc_ndcg_at_1000_std": 0.12738028206826132, + "nauc_ndcg_at_100_diff1": 0.042015479832883486, + "nauc_ndcg_at_100_max": 0.267061759431482, + "nauc_ndcg_at_100_std": 0.07290512182104139, + "nauc_ndcg_at_10_diff1": -0.12122120870754685, + "nauc_ndcg_at_10_max": 0.48196302566814775, + "nauc_ndcg_at_10_std": 0.20073536709572107, + "nauc_ndcg_at_1_diff1": 0.15992222589126076, + "nauc_ndcg_at_1_max": 0.7066438838407989, + "nauc_ndcg_at_1_std": 0.34516803717873823, + "nauc_ndcg_at_20_diff1": -0.10661851294694442, + "nauc_ndcg_at_20_max": 0.33981330679892496, + "nauc_ndcg_at_20_std": 0.20675699745130993, + "nauc_ndcg_at_3_diff1": -0.06822895395133349, + "nauc_ndcg_at_3_max": 0.5801948973031311, + "nauc_ndcg_at_3_std": 0.2644877411998497, + "nauc_ndcg_at_5_diff1": -0.08300096492046799, + "nauc_ndcg_at_5_max": 0.5620424257737597, + "nauc_ndcg_at_5_std": 0.25135047052009385, + "nauc_precision_at_1000_diff1": -0.5039877076914758, + "nauc_precision_at_1000_max": 0.5273281460558504, + "nauc_precision_at_1000_std": 0.6496446422377248, + "nauc_precision_at_100_diff1": -0.4796643779471086, + "nauc_precision_at_100_max": 0.6151414562917783, + "nauc_precision_at_100_std": 0.6678831725250701, + "nauc_precision_at_10_diff1": -0.42975726364512384, + "nauc_precision_at_10_max": 0.7033828091076564, + "nauc_precision_at_10_std": 0.6296497019716161, + "nauc_precision_at_1_diff1": 0.15992222589126076, + "nauc_precision_at_1_max": 0.7066438838407989, + "nauc_precision_at_1_std": 0.34516803717873823, + "nauc_precision_at_20_diff1": -0.4946390997396103, + "nauc_precision_at_20_max": 0.6858303123733283, + "nauc_precision_at_20_std": 0.6298812155793545, + "nauc_precision_at_3_diff1": -0.24876059895537342, + "nauc_precision_at_3_max": 0.6456537706466767, + "nauc_precision_at_3_std": 0.5045016679013927, + "nauc_precision_at_5_diff1": -0.32421116538126843, + "nauc_precision_at_5_max": 0.7016255386813988, + "nauc_precision_at_5_std": 0.5649338219757292, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.5157172488761371, + "nauc_recall_at_100_max": -0.2912343469081552, + "nauc_recall_at_100_std": -0.45545038271617866, + "nauc_recall_at_10_diff1": 0.430025555269766, + "nauc_recall_at_10_max": -0.49271570163117323, + "nauc_recall_at_10_std": -0.5498160938181346, + "nauc_recall_at_1_diff1": 0.3294708101134731, + "nauc_recall_at_1_max": -0.42544232201495896, + "nauc_recall_at_1_std": -0.5038005227813385, + "nauc_recall_at_20_diff1": 0.4176823227802248, + "nauc_recall_at_20_max": -0.5026073945178894, + "nauc_recall_at_20_std": -0.5407548592660978, + "nauc_recall_at_3_diff1": 0.36561954570571, + "nauc_recall_at_3_max": -0.4642250997167962, + "nauc_recall_at_3_std": -0.5342346748455666, + "nauc_recall_at_5_diff1": 0.3529315499791077, + "nauc_recall_at_5_max": -0.47879447841104084, + "nauc_recall_at_5_std": -0.5554339290951641, + "ndcg_at_1": 0.88235, + "ndcg_at_10": 0.83499, + "ndcg_at_100": 0.82462, + "ndcg_at_1000": 0.87986, + "ndcg_at_20": 0.82108, + "ndcg_at_3": 0.85596, + "ndcg_at_5": 0.84619, + "precision_at_1": 0.88235, + "precision_at_10": 0.61176, + "precision_at_100": 0.24838, + "precision_at_1000": 0.04454, + "precision_at_20": 0.50882, + "precision_at_3": 0.76471, + "precision_at_5": 0.69412, + "recall_at_1": 0.18698, + "recall_at_10": 0.48422, + "recall_at_100": 0.80718, + "recall_at_1000": 1.0, + "recall_at_20": 0.60501, + "recall_at_3": 0.30707, + "recall_at_5": 0.3807 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json index f957d7315e..a944a1a301 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordHardI2IRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "f20b30211b7ba3fc64a02bd83998fe75f3023719", - "evaluation_time": 17.18506622314453, + "dataset_revision": "f71ab9d4aabcda93d55a7e65edfb3a34767d89e6", + "evaluation_time": 24.67104411125183, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.04286, - "cv_recall_at_10": 0.44286, - "cv_recall_at_100": 0.77143, - "cv_recall_at_1000": 0.92857, - "cv_recall_at_20": 0.52857, - "cv_recall_at_3": 0.18571, - "cv_recall_at_5": 0.27143, + "cv_recall_at_1": 0.27143, + "cv_recall_at_10": 0.61429, + "cv_recall_at_100": 0.85714, + "cv_recall_at_1000": 1.0, + "cv_recall_at_20": 0.74286, + "cv_recall_at_3": 0.42857, + "cv_recall_at_5": 0.57143, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.04286, - "map_at_1": 0.00178, - "map_at_10": 0.02316, - "map_at_100": 0.03694, - "map_at_1000": 0.05036, - "map_at_20": 0.02814, - "map_at_3": 0.01285, - "map_at_5": 0.01752, - "mrr_at_1": 0.04285714285714286, - "mrr_at_10": 0.13929705215419497, - "mrr_at_100": 0.15254576393638014, - "mrr_at_1000": 0.15348328701607364, - "mrr_at_20": 0.14564255189255187, - "mrr_at_3": 0.1, - "mrr_at_5": 0.11857142857142858, - "nauc_cv_recall_at_1000_diff1": 0.47861811391223064, - "nauc_cv_recall_at_1000_max": 0.5808590102707742, - "nauc_cv_recall_at_1000_std": 0.09140989729224985, - "nauc_cv_recall_at_100_diff1": 0.5586401902191371, - "nauc_cv_recall_at_100_max": -0.12197159565580537, - "nauc_cv_recall_at_100_std": -0.46324143692564745, - "nauc_cv_recall_at_10_diff1": 0.33702203892735166, - "nauc_cv_recall_at_10_max": -0.2566107451434116, - "nauc_cv_recall_at_10_std": -0.2562935040214977, - "nauc_cv_recall_at_1_diff1": 0.5001341561577676, - "nauc_cv_recall_at_1_max": -0.12592791342455964, - "nauc_cv_recall_at_1_std": 0.13701815580001792, - "nauc_cv_recall_at_20_diff1": 0.33340522734848327, - "nauc_cv_recall_at_20_max": -0.27021038803160713, - "nauc_cv_recall_at_20_std": -0.2604850885276762, - "nauc_cv_recall_at_3_diff1": 0.42356508767210377, - "nauc_cv_recall_at_3_max": -0.30471252842923874, - "nauc_cv_recall_at_3_std": -0.2722848548580372, - "nauc_cv_recall_at_5_diff1": 0.37677403680683497, - "nauc_cv_recall_at_5_max": -0.30530641993804797, - "nauc_cv_recall_at_5_std": -0.2763347032980382, - "nauc_map_at_1000_diff1": 0.32492788395298233, - "nauc_map_at_1000_max": -0.264513294885872, - "nauc_map_at_1000_std": -0.06025413474178467, - "nauc_map_at_100_diff1": 0.33932455574204345, - "nauc_map_at_100_max": -0.3183944729391321, - "nauc_map_at_100_std": -0.11067793875024108, - "nauc_map_at_10_diff1": 0.3627322367418886, - "nauc_map_at_10_max": -0.270787009945954, - "nauc_map_at_10_std": -0.12449041958590679, - "nauc_map_at_1_diff1": 0.8349360037495761, - "nauc_map_at_1_max": 0.04030601005296298, - "nauc_map_at_1_std": 0.28342764489523986, - "nauc_map_at_20_diff1": 0.36471735689250456, - "nauc_map_at_20_max": -0.27443485632176895, - "nauc_map_at_20_std": -0.10351956211548237, - "nauc_map_at_3_diff1": 0.33740985231621823, - "nauc_map_at_3_max": -0.26557891525076527, - "nauc_map_at_3_std": -0.13603337759846046, - "nauc_map_at_5_diff1": 0.3822623605794258, - "nauc_map_at_5_max": -0.26535510732907475, - "nauc_map_at_5_std": -0.15188815608303466, - "nauc_mrr_at_1000_diff1": 0.42803033998914947, - "nauc_mrr_at_1000_max": -0.25330248029839103, - "nauc_mrr_at_1000_std": -0.1498777504287341, - "nauc_mrr_at_100_diff1": 0.428756525260202, - "nauc_mrr_at_100_max": -0.255106401697983, - "nauc_mrr_at_100_std": -0.15247667584774333, - "nauc_mrr_at_10_diff1": 0.4269277134986295, - "nauc_mrr_at_10_max": -0.26024547551332144, - "nauc_mrr_at_10_std": -0.15027887241014917, - "nauc_mrr_at_1_diff1": 0.5001341561577676, - "nauc_mrr_at_1_max": -0.12592791342455964, - "nauc_mrr_at_1_std": 0.13701815580001792, - "nauc_mrr_at_20_diff1": 0.4273548864254061, - "nauc_mrr_at_20_max": -0.2649857851418372, - "nauc_mrr_at_20_std": -0.150811901899745, - "nauc_mrr_at_3_diff1": 0.46234460520174797, - "nauc_mrr_at_3_max": -0.26846705418134004, - "nauc_mrr_at_3_std": -0.14951136379707816, - "nauc_mrr_at_5_diff1": 0.4315603993226756, - "nauc_mrr_at_5_max": -0.2751708043421038, - "nauc_mrr_at_5_std": -0.15676414005890574, - "nauc_ndcg_at_1000_diff1": 0.3031539661086623, - "nauc_ndcg_at_1000_max": -0.06324949257415059, - "nauc_ndcg_at_1000_std": 0.03291344536352667, - "nauc_ndcg_at_100_diff1": 0.3537878100611368, - "nauc_ndcg_at_100_max": -0.3440256642252378, - "nauc_ndcg_at_100_std": -0.13928638317122793, - "nauc_ndcg_at_10_diff1": 0.31423572757944224, - "nauc_ndcg_at_10_max": -0.27799943432453955, - "nauc_ndcg_at_10_std": -0.12000726018713354, - "nauc_ndcg_at_1_diff1": 0.5001341561577676, - "nauc_ndcg_at_1_max": -0.12592791342455964, - "nauc_ndcg_at_1_std": 0.13701815580001792, - "nauc_ndcg_at_20_diff1": 0.3527561180413498, - "nauc_ndcg_at_20_max": -0.29516361937414604, - "nauc_ndcg_at_20_std": -0.10951825882002067, - "nauc_ndcg_at_3_diff1": 0.3265974284434805, - "nauc_ndcg_at_3_max": -0.27320998825572146, - "nauc_ndcg_at_3_std": -0.12237689567430368, - "nauc_ndcg_at_5_diff1": 0.32240119227390085, - "nauc_ndcg_at_5_max": -0.30077766985994786, - "nauc_ndcg_at_5_std": -0.15147497522509035, - "nauc_precision_at_1000_diff1": -0.12639282009702774, - "nauc_precision_at_1000_max": 0.22826737103796205, - "nauc_precision_at_1000_std": 0.32628514757600596, - "nauc_precision_at_100_diff1": 0.2772154726137188, - "nauc_precision_at_100_max": -0.23028476826992986, - "nauc_precision_at_100_std": -0.1303180540104937, - "nauc_precision_at_10_diff1": 0.29107138713345343, - "nauc_precision_at_10_max": -0.30600784337992426, - "nauc_precision_at_10_std": -0.12021874830794935, - "nauc_precision_at_1_diff1": 0.5001341561577676, - "nauc_precision_at_1_max": -0.12592791342455964, - "nauc_precision_at_1_std": 0.13701815580001792, - "nauc_precision_at_20_diff1": 0.27727122281419164, - "nauc_precision_at_20_max": -0.3127822679663893, - "nauc_precision_at_20_std": -0.07951548670204237, - "nauc_precision_at_3_diff1": 0.3064711218627153, - "nauc_precision_at_3_max": -0.29553472432214506, - "nauc_precision_at_3_std": -0.17296643483519816, - "nauc_precision_at_5_diff1": 0.27289717372431765, - "nauc_precision_at_5_max": -0.33688627375325547, - "nauc_precision_at_5_std": -0.1943667406192727, - "nauc_recall_at_1000_diff1": 0.2612937021120264, - "nauc_recall_at_1000_max": -0.04177364682142833, - "nauc_recall_at_1000_std": -0.10225779216767208, - "nauc_recall_at_100_diff1": 0.23045925470740378, - "nauc_recall_at_100_max": -0.32266175361589855, - "nauc_recall_at_100_std": -0.10122639227831537, - "nauc_recall_at_10_diff1": 0.3214491777002914, - "nauc_recall_at_10_max": -0.2191819309348708, - "nauc_recall_at_10_std": -0.13294514318629588, - "nauc_recall_at_1_diff1": 0.8349360037495761, - "nauc_recall_at_1_max": 0.04030601005296298, - "nauc_recall_at_1_std": 0.28342764489523986, - "nauc_recall_at_20_diff1": 0.30304432643409973, - "nauc_recall_at_20_max": -0.2369864752715589, - "nauc_recall_at_20_std": -0.11319354561879752, - "nauc_recall_at_3_diff1": 0.2610862099372524, - "nauc_recall_at_3_max": -0.26808957441898906, - "nauc_recall_at_3_std": -0.1754514836876096, - "nauc_recall_at_5_diff1": 0.3659777743786138, - "nauc_recall_at_5_max": -0.23557478979416202, - "nauc_recall_at_5_std": -0.1749293090721626, - "ndcg_at_1": 0.04286, - "ndcg_at_10": 0.08519, - "ndcg_at_100": 0.12707, - "ndcg_at_1000": 0.26609, - "ndcg_at_20": 0.08785, - "ndcg_at_3": 0.07857, - "ndcg_at_5": 0.08201, - "precision_at_1": 0.04286, - "precision_at_10": 0.07571, - "precision_at_100": 0.03571, - "precision_at_1000": 0.01857, - "precision_at_20": 0.05714, - "precision_at_3": 0.08571, - "precision_at_5": 0.08, - "recall_at_1": 0.00178, - "recall_at_10": 0.07379, - "recall_at_100": 0.22471, - "recall_at_1000": 0.60888, - "recall_at_20": 0.09447, - "recall_at_3": 0.02931, - "recall_at_5": 0.04839 + "main_score": 0.27143, + "map_at_1": 0.06428, + "map_at_10": 0.10339, + "map_at_100": 0.1463, + "map_at_1000": 0.18944, + "map_at_20": 0.11828, + "map_at_3": 0.07918, + "map_at_5": 0.09165, + "mrr_at_1": 0.2714285714285714, + "mrr_at_10": 0.3802551020408163, + "mrr_at_100": 0.3931760616047423, + "mrr_at_1000": 0.39374600845359353, + "mrr_at_20": 0.38933244509925174, + "mrr_at_3": 0.34047619047619043, + "mrr_at_5": 0.374047619047619, + "nauc_cv_recall_at_1000_diff1": NaN, + "nauc_cv_recall_at_1000_max": NaN, + "nauc_cv_recall_at_1000_std": NaN, + "nauc_cv_recall_at_100_diff1": 0.3957240038872689, + "nauc_cv_recall_at_100_max": -0.45918367346938554, + "nauc_cv_recall_at_100_std": -0.7625364431486866, + "nauc_cv_recall_at_10_diff1": 0.3501381065268928, + "nauc_cv_recall_at_10_max": -0.3230170295134737, + "nauc_cv_recall_at_10_std": -0.28669610039365817, + "nauc_cv_recall_at_1_diff1": 0.31133966351507303, + "nauc_cv_recall_at_1_max": -0.36652967019618166, + "nauc_cv_recall_at_1_std": -0.39039944931468024, + "nauc_cv_recall_at_20_diff1": 0.48971241524433046, + "nauc_cv_recall_at_20_max": -0.35202829085807796, + "nauc_cv_recall_at_20_std": -0.4874327799859716, + "nauc_cv_recall_at_3_diff1": 0.20616527390900677, + "nauc_cv_recall_at_3_max": -0.3163788300835654, + "nauc_cv_recall_at_3_std": -0.34913649025069615, + "nauc_cv_recall_at_5_diff1": 0.24162217659137575, + "nauc_cv_recall_at_5_max": -0.40519507186858295, + "nauc_cv_recall_at_5_std": -0.349342915811088, + "nauc_map_at_1000_diff1": 0.33483050942379805, + "nauc_map_at_1000_max": -0.3354071059234048, + "nauc_map_at_1000_std": -0.325502994839205, + "nauc_map_at_100_diff1": 0.36421675112335855, + "nauc_map_at_100_max": -0.37056372968494616, + "nauc_map_at_100_std": -0.39098657201855935, + "nauc_map_at_10_diff1": 0.43724421619965775, + "nauc_map_at_10_max": -0.3475499176287484, + "nauc_map_at_10_std": -0.3087709853113486, + "nauc_map_at_1_diff1": 0.5328003645974971, + "nauc_map_at_1_max": -0.30820864556110844, + "nauc_map_at_1_std": -0.2525466251963858, + "nauc_map_at_20_diff1": 0.40424948920352965, + "nauc_map_at_20_max": -0.36561679338161, + "nauc_map_at_20_std": -0.34598603785503834, + "nauc_map_at_3_diff1": 0.47569665621843843, + "nauc_map_at_3_max": -0.3350766680518147, + "nauc_map_at_3_std": -0.29588688611880093, + "nauc_map_at_5_diff1": 0.44497494504359997, + "nauc_map_at_5_max": -0.35371869827402724, + "nauc_map_at_5_std": -0.2890820614425645, + "nauc_mrr_at_1000_diff1": 0.2682845806139734, + "nauc_mrr_at_1000_max": -0.36456025639429424, + "nauc_mrr_at_1000_std": -0.37575466392348833, + "nauc_mrr_at_100_diff1": 0.26897531827173626, + "nauc_mrr_at_100_max": -0.3659176034789706, + "nauc_mrr_at_100_std": -0.37740763588526954, + "nauc_mrr_at_10_diff1": 0.2686668033979706, + "nauc_mrr_at_10_max": -0.3628633448193816, + "nauc_mrr_at_10_std": -0.36335706237767257, + "nauc_mrr_at_1_diff1": 0.31133966351507303, + "nauc_mrr_at_1_max": -0.36652967019618166, + "nauc_mrr_at_1_std": -0.39039944931468024, + "nauc_mrr_at_20_diff1": 0.2731473760001198, + "nauc_mrr_at_20_max": -0.36635207234744227, + "nauc_mrr_at_20_std": -0.3759359230442535, + "nauc_mrr_at_3_diff1": 0.24452621954568515, + "nauc_mrr_at_3_max": -0.35272889400073, + "nauc_mrr_at_3_std": -0.3683045738415161, + "nauc_mrr_at_5_diff1": 0.25344916447455645, + "nauc_mrr_at_5_max": -0.3740602466954703, + "nauc_mrr_at_5_std": -0.3711781326955965, + "nauc_ndcg_at_1000_diff1": 0.15289382130510762, + "nauc_ndcg_at_1000_max": -0.11235365434908824, + "nauc_ndcg_at_1000_std": -0.16901273056125712, + "nauc_ndcg_at_100_diff1": 0.33546453963254314, + "nauc_ndcg_at_100_max": -0.39506763446494053, + "nauc_ndcg_at_100_std": -0.47504197799159326, + "nauc_ndcg_at_10_diff1": 0.2602337772209344, + "nauc_ndcg_at_10_max": -0.35707200508700326, + "nauc_ndcg_at_10_std": -0.32627013894830836, + "nauc_ndcg_at_1_diff1": 0.31133966351507303, + "nauc_ndcg_at_1_max": -0.36652967019618166, + "nauc_ndcg_at_1_std": -0.39039944931468024, + "nauc_ndcg_at_20_diff1": 0.2951622910865746, + "nauc_ndcg_at_20_max": -0.4075808329991618, + "nauc_ndcg_at_20_std": -0.4033601223158598, + "nauc_ndcg_at_3_diff1": 0.2583369568946569, + "nauc_ndcg_at_3_max": -0.36460761087963334, + "nauc_ndcg_at_3_std": -0.37460677189461866, + "nauc_ndcg_at_5_diff1": 0.24895791797858352, + "nauc_ndcg_at_5_max": -0.38343316697513324, + "nauc_ndcg_at_5_std": -0.35014891765983175, + "nauc_precision_at_1000_diff1": -0.15221736880156744, + "nauc_precision_at_1000_max": 0.24012671254256857, + "nauc_precision_at_1000_std": 0.31990023300446896, + "nauc_precision_at_100_diff1": -0.0025063624250149666, + "nauc_precision_at_100_max": -0.23039752243674347, + "nauc_precision_at_100_std": -0.2687557194001006, + "nauc_precision_at_10_diff1": 0.04266939397452653, + "nauc_precision_at_10_max": -0.2921849379730692, + "nauc_precision_at_10_std": -0.32849429418487175, + "nauc_precision_at_1_diff1": 0.31133966351507303, + "nauc_precision_at_1_max": -0.36652967019618166, + "nauc_precision_at_1_std": -0.39039944931468024, + "nauc_precision_at_20_diff1": -0.012177956618922228, + "nauc_precision_at_20_max": -0.3166595426664091, + "nauc_precision_at_20_std": -0.38852936131749555, + "nauc_precision_at_3_diff1": 0.151720996554046, + "nauc_precision_at_3_max": -0.3436744959797202, + "nauc_precision_at_3_std": -0.37392561492454535, + "nauc_precision_at_5_diff1": 0.09040511727078884, + "nauc_precision_at_5_max": -0.35160602517367096, + "nauc_precision_at_5_std": -0.35003095123461053, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.26559603138781285, + "nauc_recall_at_100_max": -0.3073116185520707, + "nauc_recall_at_100_std": -0.449657488606374, + "nauc_recall_at_10_diff1": 0.38683534388556096, + "nauc_recall_at_10_max": -0.3358496548040101, + "nauc_recall_at_10_std": -0.2154853289653661, + "nauc_recall_at_1_diff1": 0.5328003645974971, + "nauc_recall_at_1_max": -0.30820864556110844, + "nauc_recall_at_1_std": -0.2525466251963858, + "nauc_recall_at_20_diff1": 0.30135439280777626, + "nauc_recall_at_20_max": -0.33860982229647496, + "nauc_recall_at_20_std": -0.2973578871821516, + "nauc_recall_at_3_diff1": 0.4514271986825242, + "nauc_recall_at_3_max": -0.33100045995309413, + "nauc_recall_at_3_std": -0.2898936824323878, + "nauc_recall_at_5_diff1": 0.3748793011771698, + "nauc_recall_at_5_max": -0.3504567142206052, + "nauc_recall_at_5_std": -0.17845054202949487, + "ndcg_at_1": 0.27143, + "ndcg_at_10": 0.22624, + "ndcg_at_100": 0.29318, + "ndcg_at_1000": 0.52204, + "ndcg_at_20": 0.24135, + "ndcg_at_3": 0.24796, + "ndcg_at_5": 0.24666, + "precision_at_1": 0.27143, + "precision_at_10": 0.15714, + "precision_at_100": 0.07357, + "precision_at_1000": 0.03567, + "precision_at_20": 0.13357, + "precision_at_3": 0.20952, + "precision_at_5": 0.2, + "recall_at_1": 0.06428, + "recall_at_10": 0.14214, + "recall_at_100": 0.38739, + "recall_at_1000": 1.0, + "recall_at_20": 0.2058, + "recall_at_3": 0.08351, + "recall_at_5": 0.11676 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json index 27241284b1..f1e28194b9 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/ROxfordMediumI2IRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "3bd28e9c45e15f299117c634799f7035c4de2d31", - "evaluation_time": 17.523592710494995, + "dataset_revision": "1dfb86730ee4b3f49b441f4896d473c83eb5ff0d", + "evaluation_time": 26.38901996612549, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.75714, - "cv_recall_at_10": 0.87143, - "cv_recall_at_100": 0.94286, + "cv_recall_at_1": 0.91429, + "cv_recall_at_10": 0.94286, + "cv_recall_at_100": 1.0, "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 0.88571, - "cv_recall_at_3": 0.82857, - "cv_recall_at_5": 0.85714, + "cv_recall_at_20": 0.97143, + "cv_recall_at_3": 0.91429, + "cv_recall_at_5": 0.94286, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.75714, - "map_at_1": 0.04928, - "map_at_10": 0.14238, - "map_at_100": 0.25975, - "map_at_1000": 0.33, - "map_at_20": 0.1832, - "map_at_3": 0.08106, - "map_at_5": 0.10574, - "mrr_at_1": 0.7571428571428571, - "mrr_at_10": 0.7992857142857144, - "mrr_at_100": 0.8018753503269448, - "mrr_at_1000": 0.8022392070599913, - "mrr_at_20": 0.8001785714285715, - "mrr_at_3": 0.7904761904761906, - "mrr_at_5": 0.796904761904762, + "main_score": 0.91429, + "map_at_1": 0.08083, + "map_at_10": 0.24687, + "map_at_100": 0.42618, + "map_at_1000": 0.5327, + "map_at_20": 0.31214, + "map_at_3": 0.14225, + "map_at_5": 0.18461, + "mrr_at_1": 0.9142857142857143, + "mrr_at_10": 0.92, + "mrr_at_100": 0.9224459377246834, + "mrr_at_1000": 0.9224459377246834, + "mrr_at_20": 0.9215873015873017, + "mrr_at_3": 0.9142857142857143, + "mrr_at_5": 0.92, "nauc_cv_recall_at_1000_diff1": NaN, "nauc_cv_recall_at_1000_max": NaN, "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": 0.15581232492997169, - "nauc_cv_recall_at_100_max": 0.2811624649859951, - "nauc_cv_recall_at_100_std": -0.12196545284780497, - "nauc_cv_recall_at_10_diff1": 0.394540014958862, - "nauc_cv_recall_at_10_max": 0.4610535313601873, - "nauc_cv_recall_at_10_std": -0.14937493321936154, - "nauc_cv_recall_at_1_diff1": 0.3099268419602683, - "nauc_cv_recall_at_1_max": 0.6277204689460957, - "nauc_cv_recall_at_1_std": 0.11781811503259924, - "nauc_cv_recall_at_20_diff1": 0.39261983863312777, - "nauc_cv_recall_at_20_max": 0.3849074513526345, - "nauc_cv_recall_at_20_std": -0.18147840531561454, - "nauc_cv_recall_at_3_diff1": 0.29657669519420665, - "nauc_cv_recall_at_3_max": 0.5705233706385782, - "nauc_cv_recall_at_3_std": 0.04756418696510771, - "nauc_cv_recall_at_5_diff1": 0.37162293488824066, - "nauc_cv_recall_at_5_max": 0.5234207968901848, - "nauc_cv_recall_at_5_std": -0.06068999028182738, - "nauc_map_at_1000_diff1": -0.14810539444535628, - "nauc_map_at_1000_max": 0.42639970063449095, - "nauc_map_at_1000_std": 0.35965851851847036, - "nauc_map_at_100_diff1": -0.04654719636323819, - "nauc_map_at_100_max": 0.20124568398625026, - "nauc_map_at_100_std": 0.09779920001336984, - "nauc_map_at_10_diff1": 0.19553982602379108, - "nauc_map_at_10_max": -0.16962211647877953, - "nauc_map_at_10_std": -0.2049718676191143, - "nauc_map_at_1_diff1": 0.19831336261662813, - "nauc_map_at_1_max": -0.26048526225990315, - "nauc_map_at_1_std": -0.24848660044643464, - "nauc_map_at_20_diff1": 0.11533311201293248, - "nauc_map_at_20_max": -0.06940468788280867, - "nauc_map_at_20_std": -0.15394999023265438, - "nauc_map_at_3_diff1": 0.22661360439380449, - "nauc_map_at_3_max": -0.24582246289343465, - "nauc_map_at_3_std": -0.24358060859761363, - "nauc_map_at_5_diff1": 0.2391073300885911, - "nauc_map_at_5_max": -0.21313902222536138, - "nauc_map_at_5_std": -0.22555952910665222, - "nauc_mrr_at_1000_diff1": 0.3118072480358942, - "nauc_mrr_at_1000_max": 0.5896434235555521, - "nauc_mrr_at_1000_std": 0.07565750084460038, - "nauc_mrr_at_100_diff1": 0.3114581767470047, - "nauc_mrr_at_100_max": 0.5888175740478946, - "nauc_mrr_at_100_std": 0.07590573329917173, - "nauc_mrr_at_10_diff1": 0.31501956699434985, - "nauc_mrr_at_10_max": 0.5906694492065899, - "nauc_mrr_at_10_std": 0.07282707502971797, - "nauc_mrr_at_1_diff1": 0.3099268419602683, - "nauc_mrr_at_1_max": 0.6277204689460957, - "nauc_mrr_at_1_std": 0.11781811503259924, - "nauc_mrr_at_20_diff1": 0.31460997067981183, - "nauc_mrr_at_20_max": 0.5883036453949262, - "nauc_mrr_at_20_std": 0.07253787758405468, - "nauc_mrr_at_3_diff1": 0.298829153668517, - "nauc_mrr_at_3_max": 0.6036890630335515, - "nauc_mrr_at_3_std": 0.09929911274966623, - "nauc_mrr_at_5_diff1": 0.31309074738187426, - "nauc_mrr_at_5_max": 0.5968819745150035, - "nauc_mrr_at_5_std": 0.08124058142418439, - "nauc_ndcg_at_1000_diff1": 0.023759962305708315, - "nauc_ndcg_at_1000_max": 0.42118350886771483, - "nauc_ndcg_at_1000_std": 0.21503403862781068, - "nauc_ndcg_at_100_diff1": -0.10602196843045252, - "nauc_ndcg_at_100_max": 0.4040222401408388, - "nauc_ndcg_at_100_std": 0.3422251986340049, - "nauc_ndcg_at_10_diff1": -0.14230825595892613, - "nauc_ndcg_at_10_max": 0.5717619317460108, - "nauc_ndcg_at_10_std": 0.35188240284777716, - "nauc_ndcg_at_1_diff1": 0.3099268419602683, - "nauc_ndcg_at_1_max": 0.6277204689460957, - "nauc_ndcg_at_1_std": 0.11781811503259924, - "nauc_ndcg_at_20_diff1": -0.15743789243142528, - "nauc_ndcg_at_20_max": 0.5724054251457065, - "nauc_ndcg_at_20_std": 0.3797961049903619, - "nauc_ndcg_at_3_diff1": 0.02676887075350004, - "nauc_ndcg_at_3_max": 0.5535721465155271, - "nauc_ndcg_at_3_std": 0.22786549855388236, - "nauc_ndcg_at_5_diff1": -0.04449400646171614, - "nauc_ndcg_at_5_max": 0.555861797739005, - "nauc_ndcg_at_5_std": 0.27833851228984907, - "nauc_precision_at_1000_diff1": -0.19203752528557633, - "nauc_precision_at_1000_max": 0.4830680560318716, - "nauc_precision_at_1000_std": 0.5059538376358517, - "nauc_precision_at_100_diff1": -0.29506043721080727, - "nauc_precision_at_100_max": 0.658086670179558, - "nauc_precision_at_100_std": 0.6166722456503947, - "nauc_precision_at_10_diff1": -0.2282600490658613, - "nauc_precision_at_10_max": 0.5965276467257976, - "nauc_precision_at_10_std": 0.42049443291187055, - "nauc_precision_at_1_diff1": 0.3099268419602683, - "nauc_precision_at_1_max": 0.6277204689460957, - "nauc_precision_at_1_std": 0.11781811503259924, - "nauc_precision_at_20_diff1": -0.2777369235938004, - "nauc_precision_at_20_max": 0.6422763272584597, - "nauc_precision_at_20_std": 0.46973331485105685, - "nauc_precision_at_3_diff1": -0.03905783426100687, - "nauc_precision_at_3_max": 0.548195217505554, - "nauc_precision_at_3_std": 0.2602422302381689, - "nauc_precision_at_5_diff1": -0.12812907648800725, - "nauc_precision_at_5_max": 0.5661902630852285, - "nauc_precision_at_5_std": 0.33254144330289526, - "nauc_recall_at_1000_diff1": 0.11552874226597364, - "nauc_recall_at_1000_max": 0.1991754113531379, - "nauc_recall_at_1000_std": 0.05667927260800616, - "nauc_recall_at_100_diff1": -0.01602892704072587, - "nauc_recall_at_100_max": -0.12257633928876718, - "nauc_recall_at_100_std": -0.01682143204953458, - "nauc_recall_at_10_diff1": 0.22298978148074566, - "nauc_recall_at_10_max": -0.22859971193805304, - "nauc_recall_at_10_std": -0.23715467924390157, - "nauc_recall_at_1_diff1": 0.19831336261662813, - "nauc_recall_at_1_max": -0.26048526225990315, - "nauc_recall_at_1_std": -0.24848660044643464, - "nauc_recall_at_20_diff1": 0.16037912600194174, - "nauc_recall_at_20_max": -0.1835918931237162, - "nauc_recall_at_20_std": -0.20833669696201074, - "nauc_recall_at_3_diff1": 0.2179234117693627, - "nauc_recall_at_3_max": -0.2525912771978034, - "nauc_recall_at_3_std": -0.24319572308223014, - "nauc_recall_at_5_diff1": 0.2530123469958082, - "nauc_recall_at_5_max": -0.22138719273185706, - "nauc_recall_at_5_std": -0.2215811361906131, - "ndcg_at_1": 0.75714, - "ndcg_at_10": 0.58385, - "ndcg_at_100": 0.48629, - "ndcg_at_1000": 0.59891, - "ndcg_at_20": 0.52663, - "ndcg_at_3": 0.6553, - "ndcg_at_5": 0.63094, - "precision_at_1": 0.75714, - "precision_at_10": 0.51286, - "precision_at_100": 0.227, - "precision_at_1000": 0.05964, - "precision_at_20": 0.42286, - "precision_at_3": 0.61429, - "precision_at_5": 0.57714, - "recall_at_1": 0.04928, - "recall_at_10": 0.17721, - "recall_at_100": 0.44393, - "recall_at_1000": 0.76857, - "recall_at_20": 0.24576, - "recall_at_3": 0.08507, - "recall_at_5": 0.11871 + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": -0.4013772175536871, + "nauc_cv_recall_at_10_max": 0.42938842203548166, + "nauc_cv_recall_at_10_std": 0.6586134453781514, + "nauc_cv_recall_at_1_diff1": -0.11554621848739352, + "nauc_cv_recall_at_1_max": 0.5269996887643944, + "nauc_cv_recall_at_1_std": 0.21856520385932235, + "nauc_cv_recall_at_20_diff1": 0.3489729225023323, + "nauc_cv_recall_at_20_max": 0.9346405228758171, + "nauc_cv_recall_at_20_std": 0.5401493930905691, + "nauc_cv_recall_at_3_diff1": -0.11554621848739352, + "nauc_cv_recall_at_3_max": 0.5269996887643944, + "nauc_cv_recall_at_3_std": 0.21856520385932235, + "nauc_cv_recall_at_5_diff1": -0.4013772175536871, + "nauc_cv_recall_at_5_max": 0.42938842203548166, + "nauc_cv_recall_at_5_std": 0.6586134453781514, + "nauc_map_at_1000_diff1": -0.23125835300876216, + "nauc_map_at_1000_max": 0.23519514137370254, + "nauc_map_at_1000_std": 0.2182230501859235, + "nauc_map_at_100_diff1": -0.05015810647551921, + "nauc_map_at_100_max": -0.04327639525818628, + "nauc_map_at_100_std": -0.15828452131450388, + "nauc_map_at_10_diff1": 0.1893778857001354, + "nauc_map_at_10_max": -0.34441772024823814, + "nauc_map_at_10_std": -0.3848968165397017, + "nauc_map_at_1_diff1": 0.1529533401602949, + "nauc_map_at_1_max": -0.4190116365256113, + "nauc_map_at_1_std": -0.3330481576960033, + "nauc_map_at_20_diff1": 0.09589668436288233, + "nauc_map_at_20_max": -0.2673998395876724, + "nauc_map_at_20_std": -0.3314998258254241, + "nauc_map_at_3_diff1": 0.22783602569587666, + "nauc_map_at_3_max": -0.375611479696183, + "nauc_map_at_3_std": -0.3593266287200665, + "nauc_map_at_5_diff1": 0.23171354107352043, + "nauc_map_at_5_max": -0.3834575082863232, + "nauc_map_at_5_std": -0.38241308439234356, + "nauc_mrr_at_1000_diff1": -0.14268770122670912, + "nauc_mrr_at_1000_max": 0.5205774182168708, + "nauc_mrr_at_1000_std": 0.2680405904560385, + "nauc_mrr_at_100_diff1": -0.14268770122670912, + "nauc_mrr_at_100_max": 0.5205774182168708, + "nauc_mrr_at_100_std": 0.2680405904560385, + "nauc_mrr_at_10_diff1": -0.15637921835400717, + "nauc_mrr_at_10_max": 0.5130552220888355, + "nauc_mrr_at_10_std": 0.2814292383620098, + "nauc_mrr_at_1_diff1": -0.11554621848739352, + "nauc_mrr_at_1_max": 0.5269996887643944, + "nauc_mrr_at_1_std": 0.21856520385932235, + "nauc_mrr_at_20_diff1": -0.13623047059579352, + "nauc_mrr_at_20_max": 0.524976657329598, + "nauc_mrr_at_20_std": 0.2713958727890603, + "nauc_mrr_at_3_diff1": -0.11554621848739352, + "nauc_mrr_at_3_max": 0.5269996887643944, + "nauc_mrr_at_3_std": 0.21856520385932235, + "nauc_mrr_at_5_diff1": -0.15637921835400717, + "nauc_mrr_at_5_max": 0.5130552220888355, + "nauc_mrr_at_5_std": 0.2814292383620098, + "nauc_ndcg_at_1000_diff1": -0.24370615505516585, + "nauc_ndcg_at_1000_max": 0.3585208209406902, + "nauc_ndcg_at_1000_std": 0.263969492188972, + "nauc_ndcg_at_100_diff1": -0.14186676166246207, + "nauc_ndcg_at_100_max": 0.2275659208451184, + "nauc_ndcg_at_100_std": 0.16048415696096283, + "nauc_ndcg_at_10_diff1": -0.30589363585428303, + "nauc_ndcg_at_10_max": 0.500142114454778, + "nauc_ndcg_at_10_std": 0.4141408373889108, + "nauc_ndcg_at_1_diff1": -0.11554621848739352, + "nauc_ndcg_at_1_max": 0.5269996887643944, + "nauc_ndcg_at_1_std": 0.21856520385932235, + "nauc_ndcg_at_20_diff1": -0.2982156974982523, + "nauc_ndcg_at_20_max": 0.487494996769566, + "nauc_ndcg_at_20_std": 0.4663494007016063, + "nauc_ndcg_at_3_diff1": -0.13605929183456947, + "nauc_ndcg_at_3_max": 0.5394295944368543, + "nauc_ndcg_at_3_std": 0.2841764519265793, + "nauc_ndcg_at_5_diff1": -0.18517240726502945, + "nauc_ndcg_at_5_max": 0.5018241088290002, + "nauc_ndcg_at_5_std": 0.32986920973664985, + "nauc_precision_at_1000_diff1": -0.2610146663608417, + "nauc_precision_at_1000_max": 0.42090687761865053, + "nauc_precision_at_1000_std": 0.5453516915872082, + "nauc_precision_at_100_diff1": -0.31550298388759973, + "nauc_precision_at_100_max": 0.5382316505653624, + "nauc_precision_at_100_std": 0.5864596701910575, + "nauc_precision_at_10_diff1": -0.3396134608542791, + "nauc_precision_at_10_max": 0.5841486511939392, + "nauc_precision_at_10_std": 0.4649483079756401, + "nauc_precision_at_1_diff1": -0.11554621848739352, + "nauc_precision_at_1_max": 0.5269996887643944, + "nauc_precision_at_1_std": 0.21856520385932235, + "nauc_precision_at_20_diff1": -0.3739204789385173, + "nauc_precision_at_20_max": 0.5974041684159593, + "nauc_precision_at_20_std": 0.5452135797286048, + "nauc_precision_at_3_diff1": -0.16477444647626113, + "nauc_precision_at_3_max": 0.5728888479602177, + "nauc_precision_at_3_std": 0.3239051214923765, + "nauc_precision_at_5_diff1": -0.21896064507078017, + "nauc_precision_at_5_max": 0.5663742826794222, + "nauc_precision_at_5_std": 0.378912278123309, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.18668259021675102, + "nauc_recall_at_100_max": -0.13326671980523305, + "nauc_recall_at_100_std": -0.28507500423807525, + "nauc_recall_at_10_diff1": 0.17850980669329247, + "nauc_recall_at_10_max": -0.40096190909138624, + "nauc_recall_at_10_std": -0.3570134850979618, + "nauc_recall_at_1_diff1": 0.1529533401602949, + "nauc_recall_at_1_max": -0.4190116365256113, + "nauc_recall_at_1_std": -0.3330481576960033, + "nauc_recall_at_20_diff1": 0.1509705328425781, + "nauc_recall_at_20_max": -0.3011359312987517, + "nauc_recall_at_20_std": -0.2929059943600196, + "nauc_recall_at_3_diff1": 0.22113145253276656, + "nauc_recall_at_3_max": -0.3819326194584587, + "nauc_recall_at_3_std": -0.3572099966902034, + "nauc_recall_at_5_diff1": 0.1981700707835856, + "nauc_recall_at_5_max": -0.42018635579579555, + "nauc_recall_at_5_std": -0.3263923639383356, + "ndcg_at_1": 0.91429, + "ndcg_at_10": 0.77153, + "ndcg_at_100": 0.65508, + "ndcg_at_1000": 0.81056, + "ndcg_at_20": 0.70656, + "ndcg_at_3": 0.85518, + "ndcg_at_5": 0.83659, + "precision_at_1": 0.91429, + "precision_at_10": 0.66143, + "precision_at_100": 0.29157, + "precision_at_1000": 0.07894, + "precision_at_20": 0.55, + "precision_at_3": 0.81429, + "precision_at_5": 0.76571, + "recall_at_1": 0.08083, + "recall_at_10": 0.26885, + "recall_at_100": 0.57158, + "recall_at_1000": 1.0, + "recall_at_20": 0.35986, + "recall_at_3": 0.14512, + "recall_at_5": 0.20079 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json index bd4a6c53b9..1c5827ef01 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisEasyI2IRetrieval.json @@ -1,12 +1,12 @@ { - "dataset_revision": "a7293da8a341de665ee4dcb2f209281df342d80b", - "evaluation_time": 22.57354736328125, + "dataset_revision": "7d821ddebcb30ad343133e3a81e23347ac2a08a8", + "evaluation_time": 44.682684659957886, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.94286, + "cv_recall_at_1": 0.97143, "cv_recall_at_10": 1.0, "cv_recall_at_100": 1.0, "cv_recall_at_1000": 1.0, @@ -17,21 +17,21 @@ "languages": [ "eng-Latn" ], - "main_score": 0.94286, - "map_at_1": 0.02765, - "map_at_10": 0.11901, - "map_at_100": 0.46776, - "map_at_1000": 0.58942, - "map_at_20": 0.19679, - "map_at_3": 0.05048, - "map_at_5": 0.07407, - "mrr_at_1": 0.9428571428571428, - "mrr_at_10": 0.9666666666666666, - "mrr_at_100": 0.9666666666666666, - "mrr_at_1000": 0.9666666666666666, - "mrr_at_20": 0.9666666666666666, - "mrr_at_3": 0.9666666666666666, - "mrr_at_5": 0.9666666666666666, + "main_score": 0.97143, + "map_at_1": 0.02925, + "map_at_10": 0.14539, + "map_at_100": 0.63054, + "map_at_1000": 0.75513, + "map_at_20": 0.24788, + "map_at_3": 0.06378, + "map_at_5": 0.08742, + "mrr_at_1": 0.9714285714285714, + "mrr_at_10": 0.9857142857142858, + "mrr_at_100": 0.9857142857142858, + "mrr_at_1000": 0.9857142857142858, + "mrr_at_20": 0.9857142857142858, + "mrr_at_3": 0.9857142857142858, + "mrr_at_5": 0.9857142857142858, "nauc_cv_recall_at_1000_diff1": NaN, "nauc_cv_recall_at_1000_max": NaN, "nauc_cv_recall_at_1000_std": NaN, @@ -41,9 +41,9 @@ "nauc_cv_recall_at_10_diff1": NaN, "nauc_cv_recall_at_10_max": NaN, "nauc_cv_recall_at_10_std": NaN, - "nauc_cv_recall_at_1_diff1": -0.303338001867413, - "nauc_cv_recall_at_1_max": -0.11951447245564903, - "nauc_cv_recall_at_1_std": 0.38328664799253065, + "nauc_cv_recall_at_1_diff1": -0.6615312791783367, + "nauc_cv_recall_at_1_max": 0.2754435107376266, + "nauc_cv_recall_at_1_std": 0.7117180205415492, "nauc_cv_recall_at_20_diff1": NaN, "nauc_cv_recall_at_20_max": NaN, "nauc_cv_recall_at_20_std": NaN, @@ -53,132 +53,132 @@ "nauc_cv_recall_at_5_diff1": NaN, "nauc_cv_recall_at_5_max": NaN, "nauc_cv_recall_at_5_std": NaN, - "nauc_map_at_1000_diff1": -0.2774518254418231, - "nauc_map_at_1000_max": 0.25032079891265085, - "nauc_map_at_1000_std": 0.45081359384807485, - "nauc_map_at_100_diff1": -0.04907209740285267, - "nauc_map_at_100_max": 0.18686662832088577, - "nauc_map_at_100_std": 0.31839565681325593, - "nauc_map_at_10_diff1": 0.4545859442374117, - "nauc_map_at_10_max": -0.04495045809094941, - "nauc_map_at_10_std": 0.002947268581625076, - "nauc_map_at_1_diff1": 0.7049183369958152, - "nauc_map_at_1_max": -0.2616079702049637, - "nauc_map_at_1_std": -0.3618487407893112, - "nauc_map_at_20_diff1": 0.30890872824690435, - "nauc_map_at_20_max": 0.07581745603833642, - "nauc_map_at_20_std": 0.15307053171724933, - "nauc_map_at_3_diff1": 0.5935344781456238, - "nauc_map_at_3_max": -0.20146382202873225, - "nauc_map_at_3_std": -0.22821424999017603, - "nauc_map_at_5_diff1": 0.5703897004682528, - "nauc_map_at_5_max": -0.16308005302080678, - "nauc_map_at_5_std": -0.1620575175690372, - "nauc_mrr_at_1000_diff1": -0.39652527677737137, - "nauc_mrr_at_1000_max": -0.10590903027877008, - "nauc_mrr_at_1000_std": 0.452714419100977, - "nauc_mrr_at_100_diff1": -0.39652527677737137, - "nauc_mrr_at_100_max": -0.10590903027877008, - "nauc_mrr_at_100_std": 0.452714419100977, - "nauc_mrr_at_10_diff1": -0.39652527677737137, - "nauc_mrr_at_10_max": -0.10590903027877008, - "nauc_mrr_at_10_std": 0.452714419100977, - "nauc_mrr_at_1_diff1": -0.303338001867413, - "nauc_mrr_at_1_max": -0.11951447245564903, - "nauc_mrr_at_1_std": 0.38328664799253065, - "nauc_mrr_at_20_diff1": -0.39652527677737137, - "nauc_mrr_at_20_max": -0.10590903027877008, - "nauc_mrr_at_20_std": 0.452714419100977, - "nauc_mrr_at_3_diff1": -0.39652527677737137, - "nauc_mrr_at_3_max": -0.10590903027877008, - "nauc_mrr_at_3_std": 0.452714419100977, - "nauc_mrr_at_5_diff1": -0.39652527677737137, - "nauc_mrr_at_5_max": -0.10590903027877008, - "nauc_mrr_at_5_std": 0.452714419100977, - "nauc_ndcg_at_1000_diff1": -0.49032575238867226, - "nauc_ndcg_at_1000_max": 0.30827149571860624, - "nauc_ndcg_at_1000_std": 0.5422742306681069, - "nauc_ndcg_at_100_diff1": -0.2712801258824707, - "nauc_ndcg_at_100_max": 0.2469510949945573, - "nauc_ndcg_at_100_std": 0.4367311454283413, - "nauc_ndcg_at_10_diff1": -0.519586478031318, - "nauc_ndcg_at_10_max": 0.35401668386388874, - "nauc_ndcg_at_10_std": 0.5596598049318611, - "nauc_ndcg_at_1_diff1": -0.303338001867413, - "nauc_ndcg_at_1_max": -0.11951447245564903, - "nauc_ndcg_at_1_std": 0.38328664799253065, - "nauc_ndcg_at_20_diff1": -0.4606517361381654, - "nauc_ndcg_at_20_max": 0.38378869676003413, - "nauc_ndcg_at_20_std": 0.474186229263476, - "nauc_ndcg_at_3_diff1": -0.585149663762276, - "nauc_ndcg_at_3_max": 0.217548589813707, - "nauc_ndcg_at_3_std": 0.550167698282911, - "nauc_ndcg_at_5_diff1": -0.5267224367233376, - "nauc_ndcg_at_5_max": 0.2591401820511711, - "nauc_ndcg_at_5_std": 0.5702945720424282, - "nauc_precision_at_1000_diff1": -0.4499561707406712, - "nauc_precision_at_1000_max": 0.15634621233710536, - "nauc_precision_at_1000_std": 0.17441880422894795, - "nauc_precision_at_100_diff1": -0.4968893518170479, - "nauc_precision_at_100_max": 0.19447257428460527, - "nauc_precision_at_100_std": 0.25819757590889797, - "nauc_precision_at_10_diff1": -0.6167580528722199, - "nauc_precision_at_10_max": 0.3956822166454678, - "nauc_precision_at_10_std": 0.587829591941972, - "nauc_precision_at_1_diff1": -0.303338001867413, - "nauc_precision_at_1_max": -0.11951447245564903, - "nauc_precision_at_1_std": 0.38328664799253065, - "nauc_precision_at_20_diff1": -0.6031642916224456, - "nauc_precision_at_20_max": 0.4138166765495704, - "nauc_precision_at_20_std": 0.5161953717797159, - "nauc_precision_at_3_diff1": -0.6298090956856782, - "nauc_precision_at_3_max": 0.24751396267538517, - "nauc_precision_at_3_std": 0.557806449101913, - "nauc_precision_at_5_diff1": -0.5860255772185323, - "nauc_precision_at_5_max": 0.29785990954926145, - "nauc_precision_at_5_std": 0.5944106636660691, - "nauc_recall_at_1000_diff1": -0.12209326635132679, - "nauc_recall_at_1000_max": 0.008543724924431836, - "nauc_recall_at_1000_std": 0.25794589209932395, - "nauc_recall_at_100_diff1": 0.16704714994186, - "nauc_recall_at_100_max": 0.053397233537995845, - "nauc_recall_at_100_std": 0.10488570154536475, - "nauc_recall_at_10_diff1": 0.5240194164262459, - "nauc_recall_at_10_max": -0.03784601294172749, - "nauc_recall_at_10_std": -0.07182820315172336, - "nauc_recall_at_1_diff1": 0.7049183369958152, - "nauc_recall_at_1_max": -0.2616079702049637, - "nauc_recall_at_1_std": -0.3618487407893112, - "nauc_recall_at_20_diff1": 0.40266587722079766, - "nauc_recall_at_20_max": 0.03757413630827814, - "nauc_recall_at_20_std": 0.042791117828726286, - "nauc_recall_at_3_diff1": 0.6179795805128587, - "nauc_recall_at_3_max": -0.1966728040865525, - "nauc_recall_at_3_std": -0.2470220135910208, - "nauc_recall_at_5_diff1": 0.6188095385694616, - "nauc_recall_at_5_max": -0.1447272403059871, - "nauc_recall_at_5_std": -0.20646683990606104, - "ndcg_at_1": 0.94286, - "ndcg_at_10": 0.85179, - "ndcg_at_100": 0.71775, - "ndcg_at_1000": 0.83657, - "ndcg_at_20": 0.82388, - "ndcg_at_3": 0.8936, - "ndcg_at_5": 0.88343, - "precision_at_1": 0.94286, - "precision_at_10": 0.81429, - "precision_at_100": 0.519, - "precision_at_1000": 0.09099, - "precision_at_20": 0.77143, - "precision_at_3": 0.87619, - "precision_at_5": 0.86286, - "recall_at_1": 0.02765, - "recall_at_10": 0.1307, - "recall_at_100": 0.60631, - "recall_at_1000": 0.93409, - "recall_at_20": 0.22272, - "recall_at_3": 0.05209, - "recall_at_5": 0.07985 + "nauc_map_at_1000_diff1": -0.40591219852099064, + "nauc_map_at_1000_max": 0.26094130889238604, + "nauc_map_at_1000_std": 0.09665505089407778, + "nauc_map_at_100_diff1": -0.12855307032951752, + "nauc_map_at_100_max": 0.15818953768862642, + "nauc_map_at_100_std": -0.2617923598733903, + "nauc_map_at_10_diff1": 0.4267948633570509, + "nauc_map_at_10_max": -0.08091999723228532, + "nauc_map_at_10_std": -0.48144180579841844, + "nauc_map_at_1_diff1": 0.7246477183930717, + "nauc_map_at_1_max": -0.2595633654811421, + "nauc_map_at_1_std": -0.473439121428067, + "nauc_map_at_20_diff1": 0.28638200450299545, + "nauc_map_at_20_max": 0.022722341991735208, + "nauc_map_at_20_std": -0.4223664868261228, + "nauc_map_at_3_diff1": 0.599669109747631, + "nauc_map_at_3_max": -0.18294743939161057, + "nauc_map_at_3_std": -0.500291623756148, + "nauc_map_at_5_diff1": 0.5435589386638962, + "nauc_map_at_5_max": -0.1414910554361904, + "nauc_map_at_5_std": -0.5064288331914394, + "nauc_mrr_at_1000_diff1": -0.6615312791783412, + "nauc_mrr_at_1000_max": 0.2754435107376266, + "nauc_mrr_at_1000_std": 0.7117180205415492, + "nauc_mrr_at_100_diff1": -0.6615312791783412, + "nauc_mrr_at_100_max": 0.2754435107376266, + "nauc_mrr_at_100_std": 0.7117180205415492, + "nauc_mrr_at_10_diff1": -0.6615312791783412, + "nauc_mrr_at_10_max": 0.2754435107376266, + "nauc_mrr_at_10_std": 0.7117180205415492, + "nauc_mrr_at_1_diff1": -0.6615312791783367, + "nauc_mrr_at_1_max": 0.2754435107376266, + "nauc_mrr_at_1_std": 0.7117180205415492, + "nauc_mrr_at_20_diff1": -0.6615312791783412, + "nauc_mrr_at_20_max": 0.2754435107376266, + "nauc_mrr_at_20_std": 0.7117180205415492, + "nauc_mrr_at_3_diff1": -0.6615312791783412, + "nauc_mrr_at_3_max": 0.2754435107376266, + "nauc_mrr_at_3_std": 0.7117180205415492, + "nauc_mrr_at_5_diff1": -0.6615312791783412, + "nauc_mrr_at_5_max": 0.2754435107376266, + "nauc_mrr_at_5_std": 0.7117180205415492, + "nauc_ndcg_at_1000_diff1": -0.6596495665503245, + "nauc_ndcg_at_1000_max": 0.35984465648987496, + "nauc_ndcg_at_1000_std": 0.31222271915833927, + "nauc_ndcg_at_100_diff1": -0.42172915492454077, + "nauc_ndcg_at_100_max": 0.1660609412830679, + "nauc_ndcg_at_100_std": 0.03890643898659772, + "nauc_ndcg_at_10_diff1": -0.795470804963755, + "nauc_ndcg_at_10_max": 0.62218300778039, + "nauc_ndcg_at_10_std": 0.41853096184140837, + "nauc_ndcg_at_1_diff1": -0.6615312791783367, + "nauc_ndcg_at_1_max": 0.2754435107376266, + "nauc_ndcg_at_1_std": 0.7117180205415492, + "nauc_ndcg_at_20_diff1": -0.8166236637349379, + "nauc_ndcg_at_20_max": 0.5512278088652715, + "nauc_ndcg_at_20_std": 0.4393916095824728, + "nauc_ndcg_at_3_diff1": -0.9341977827431088, + "nauc_ndcg_at_3_max": 0.6672676444157623, + "nauc_ndcg_at_3_std": 0.39642947946164325, + "nauc_ndcg_at_5_diff1": -0.9441446617385689, + "nauc_ndcg_at_5_max": 0.680574940437725, + "nauc_ndcg_at_5_std": 0.45573053150824994, + "nauc_precision_at_1000_diff1": -0.4449935554869243, + "nauc_precision_at_1000_max": 0.16030281251691458, + "nauc_precision_at_1000_std": 0.5733632485324373, + "nauc_precision_at_100_diff1": -0.5302407180791104, + "nauc_precision_at_100_max": 0.1551769884391311, + "nauc_precision_at_100_std": 0.4184977473765995, + "nauc_precision_at_10_diff1": -0.9456434208407176, + "nauc_precision_at_10_max": 0.5868765770118642, + "nauc_precision_at_10_std": 0.50484511730284, + "nauc_precision_at_1_diff1": -0.6615312791783367, + "nauc_precision_at_1_max": 0.2754435107376266, + "nauc_precision_at_1_std": 0.7117180205415492, + "nauc_precision_at_20_diff1": -0.926778796672683, + "nauc_precision_at_20_max": 0.5280684464415885, + "nauc_precision_at_20_std": 0.5682403269442342, + "nauc_precision_at_3_diff1": -1.0055244319950187, + "nauc_precision_at_3_max": 0.7123404917522567, + "nauc_precision_at_3_std": 0.37741207594148934, + "nauc_precision_at_5_diff1": -1.0297986387840703, + "nauc_precision_at_5_max": 0.6765172620516983, + "nauc_precision_at_5_std": 0.4879394858412396, + "nauc_recall_at_1000_diff1": -0.033581200345988084, + "nauc_recall_at_1000_max": -0.25140608933946496, + "nauc_recall_at_1000_std": 0.10256335172342733, + "nauc_recall_at_100_diff1": 0.18293184373343338, + "nauc_recall_at_100_max": -0.0562892444564702, + "nauc_recall_at_100_std": -0.4925077606843601, + "nauc_recall_at_10_diff1": 0.48352650007752274, + "nauc_recall_at_10_max": -0.13513020550162758, + "nauc_recall_at_10_std": -0.48966303958842, + "nauc_recall_at_1_diff1": 0.7246477183930717, + "nauc_recall_at_1_max": -0.2595633654811421, + "nauc_recall_at_1_std": -0.473439121428067, + "nauc_recall_at_20_diff1": 0.34593183103928304, + "nauc_recall_at_20_max": -0.054714009747642164, + "nauc_recall_at_20_std": -0.45184646419179664, + "nauc_recall_at_3_diff1": 0.6064436631162091, + "nauc_recall_at_3_max": -0.18329351534591007, + "nauc_recall_at_3_std": -0.5063835059924418, + "nauc_recall_at_5_diff1": 0.5499812724878281, + "nauc_recall_at_5_max": -0.14351977259114093, + "nauc_recall_at_5_std": -0.510558046605686, + "ndcg_at_1": 0.97143, + "ndcg_at_10": 0.92622, + "ndcg_at_100": 0.84274, + "ndcg_at_1000": 0.92213, + "ndcg_at_20": 0.91612, + "ndcg_at_3": 0.95074, + "ndcg_at_5": 0.939, + "precision_at_1": 0.97143, + "precision_at_10": 0.89571, + "precision_at_100": 0.62029, + "precision_at_1000": 0.09697, + "precision_at_20": 0.86929, + "precision_at_3": 0.94286, + "precision_at_5": 0.92286, + "recall_at_1": 0.02925, + "recall_at_10": 0.15836, + "recall_at_100": 0.7168, + "recall_at_1000": 0.98961, + "recall_at_20": 0.26733, + "recall_at_3": 0.06435, + "recall_at_5": 0.08842 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json index 5b15d0ae51..2fd705aca4 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisHardI2IRetrieval.json @@ -1,184 +1,184 @@ { - "dataset_revision": "fd121b6592fe946616fa85116703b94a4c61fd63", - "evaluation_time": 22.44923186302185, + "dataset_revision": "d3e0adf4e942446c04427511ccce281c86861248", + "evaluation_time": 60.67172384262085, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.04286, - "cv_recall_at_10": 0.41429, - "cv_recall_at_100": 0.97143, + "cv_recall_at_1": 0.41429, + "cv_recall_at_10": 0.68571, + "cv_recall_at_100": 1.0, "cv_recall_at_1000": 1.0, - "cv_recall_at_20": 0.6, - "cv_recall_at_3": 0.17143, - "cv_recall_at_5": 0.27143, + "cv_recall_at_20": 0.77143, + "cv_recall_at_3": 0.51429, + "cv_recall_at_5": 0.6, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.04286, - "map_at_1": 0.00032, - "map_at_10": 0.00319, - "map_at_100": 0.04427, - "map_at_1000": 0.14044, - "map_at_20": 0.00602, - "map_at_3": 0.00096, - "map_at_5": 0.00146, - "mrr_at_1": 0.04285714285714286, - "mrr_at_10": 0.13515873015873012, - "mrr_at_100": 0.15938730320565236, - "mrr_at_1000": 0.15962520658618265, - "mrr_at_20": 0.14815883634769078, - "mrr_at_3": 0.0976190476190476, - "mrr_at_5": 0.11833333333333333, + "main_score": 0.41429, + "map_at_1": 0.00597, + "map_at_10": 0.05664, + "map_at_100": 0.23596, + "map_at_1000": 0.3628, + "map_at_20": 0.10243, + "map_at_3": 0.01731, + "map_at_5": 0.02872, + "mrr_at_1": 0.4142857142857143, + "mrr_at_10": 0.49003968253968255, + "mrr_at_100": 0.502828543277437, + "mrr_at_1000": 0.502828543277437, + "mrr_at_20": 0.49544343814080655, + "mrr_at_3": 0.4619047619047618, + "mrr_at_5": 0.4804761904761904, "nauc_cv_recall_at_1000_diff1": NaN, "nauc_cv_recall_at_1000_max": NaN, "nauc_cv_recall_at_1000_std": NaN, - "nauc_cv_recall_at_100_diff1": -0.6909430438842217, - "nauc_cv_recall_at_100_max": 0.6790382819794577, - "nauc_cv_recall_at_100_std": 0.4143323996265152, - "nauc_cv_recall_at_10_diff1": 0.2550034187718293, - "nauc_cv_recall_at_10_max": -0.26788387263688956, - "nauc_cv_recall_at_10_std": -0.3843993125496647, - "nauc_cv_recall_at_1_diff1": 0.3989804132009661, - "nauc_cv_recall_at_1_max": -0.12905822377247125, - "nauc_cv_recall_at_1_std": -0.401663536356319, - "nauc_cv_recall_at_20_diff1": 0.12233388457787381, - "nauc_cv_recall_at_20_max": -0.21169910528770744, - "nauc_cv_recall_at_20_std": -0.2534876818046898, - "nauc_cv_recall_at_3_diff1": 0.35100458047053923, - "nauc_cv_recall_at_3_max": -0.1396262752446388, - "nauc_cv_recall_at_3_std": -0.314985425775557, - "nauc_cv_recall_at_5_diff1": 0.35853258558905104, - "nauc_cv_recall_at_5_max": -0.1935699390601907, - "nauc_cv_recall_at_5_std": -0.4389893304719292, - "nauc_map_at_1000_diff1": -0.048721647577313475, - "nauc_map_at_1000_max": 0.02185883585100512, - "nauc_map_at_1000_std": -0.014590760062175558, - "nauc_map_at_100_diff1": 0.20175041133547802, - "nauc_map_at_100_max": -0.2999406736946551, - "nauc_map_at_100_std": -0.21513200915624464, - "nauc_map_at_10_diff1": 0.4340980160657004, - "nauc_map_at_10_max": -0.30137691467001876, - "nauc_map_at_10_std": -0.4166728188377905, - "nauc_map_at_1_diff1": 0.3225400054210336, - "nauc_map_at_1_max": -0.20455804791357643, - "nauc_map_at_1_std": -0.3502703918125217, - "nauc_map_at_20_diff1": 0.4482686549814925, - "nauc_map_at_20_max": -0.33767777626178874, - "nauc_map_at_20_std": -0.39261152175985364, - "nauc_map_at_3_diff1": 0.3906475942798504, - "nauc_map_at_3_max": -0.1421273626640729, - "nauc_map_at_3_std": -0.3164227766935927, - "nauc_map_at_5_diff1": 0.40607500232859556, - "nauc_map_at_5_max": -0.21636348183658996, - "nauc_map_at_5_std": -0.385395422874435, - "nauc_mrr_at_1000_diff1": 0.33380716561285934, - "nauc_mrr_at_1000_max": -0.19720559281432273, - "nauc_mrr_at_1000_std": -0.3739725633585593, - "nauc_mrr_at_100_diff1": 0.3320423802626735, - "nauc_mrr_at_100_max": -0.19584551091041755, - "nauc_mrr_at_100_std": -0.37267220068248286, - "nauc_mrr_at_10_diff1": 0.33810313585026747, - "nauc_mrr_at_10_max": -0.20898927155954197, - "nauc_mrr_at_10_std": -0.39317334419879707, - "nauc_mrr_at_1_diff1": 0.3989804132009661, - "nauc_mrr_at_1_max": -0.12905822377247125, - "nauc_mrr_at_1_std": -0.401663536356319, - "nauc_mrr_at_20_diff1": 0.3270365458084192, - "nauc_mrr_at_20_max": -0.206603016304459, - "nauc_mrr_at_20_std": -0.3731460382346558, - "nauc_mrr_at_3_diff1": 0.3569994314378864, - "nauc_mrr_at_3_max": -0.17174174325462235, - "nauc_mrr_at_3_std": -0.3561501867618589, - "nauc_mrr_at_5_diff1": 0.3571575774665211, - "nauc_mrr_at_5_max": -0.18834228874209263, - "nauc_mrr_at_5_std": -0.40200287994954503, - "nauc_ndcg_at_1000_diff1": -0.0743652757576749, - "nauc_ndcg_at_1000_max": 0.09391843454021216, - "nauc_ndcg_at_1000_std": 0.08425685683454673, - "nauc_ndcg_at_100_diff1": 0.023516077444591114, - "nauc_ndcg_at_100_max": -0.11892313686780007, - "nauc_ndcg_at_100_std": -0.16710001090586352, - "nauc_ndcg_at_10_diff1": 0.3882520721308202, - "nauc_ndcg_at_10_max": -0.2924404653976282, - "nauc_ndcg_at_10_std": -0.4189010025783995, - "nauc_ndcg_at_1_diff1": 0.3989804132009661, - "nauc_ndcg_at_1_max": -0.12905822377247125, - "nauc_ndcg_at_1_std": -0.401663536356319, - "nauc_ndcg_at_20_diff1": 0.3805388505094879, - "nauc_ndcg_at_20_max": -0.30519760291519316, - "nauc_ndcg_at_20_std": -0.3772566200501027, - "nauc_ndcg_at_3_diff1": 0.412201762045363, - "nauc_ndcg_at_3_max": -0.17116948069763432, - "nauc_ndcg_at_3_std": -0.3608861222611007, - "nauc_ndcg_at_5_diff1": 0.35696244084680845, - "nauc_ndcg_at_5_max": -0.25186160853169576, - "nauc_ndcg_at_5_std": -0.400509495428499, - "nauc_precision_at_1000_diff1": -0.22093427992384315, - "nauc_precision_at_1000_max": 0.2196784647696767, - "nauc_precision_at_1000_std": 0.04361074681539203, - "nauc_precision_at_100_diff1": -0.016149306619419775, - "nauc_precision_at_100_max": 0.034799312614065006, - "nauc_precision_at_100_std": -0.13882957890950748, - "nauc_precision_at_10_diff1": 0.38489780804914814, - "nauc_precision_at_10_max": -0.30500607571435273, - "nauc_precision_at_10_std": -0.41532331666205985, - "nauc_precision_at_1_diff1": 0.3989804132009661, - "nauc_precision_at_1_max": -0.12905822377247125, - "nauc_precision_at_1_std": -0.401663536356319, - "nauc_precision_at_20_diff1": 0.3692528574157947, - "nauc_precision_at_20_max": -0.30111599366142605, - "nauc_precision_at_20_std": -0.35015003540161194, - "nauc_precision_at_3_diff1": 0.428985570425912, - "nauc_precision_at_3_max": -0.1563930424168448, - "nauc_precision_at_3_std": -0.3449801647848643, - "nauc_precision_at_5_diff1": 0.34983601813446513, - "nauc_precision_at_5_max": -0.2741390952059421, - "nauc_precision_at_5_std": -0.4086645123951, - "nauc_recall_at_1000_diff1": -0.08562125010420926, - "nauc_recall_at_1000_max": 0.07813883934551541, - "nauc_recall_at_1000_std": 0.23218650354926193, - "nauc_recall_at_100_diff1": -0.01041195221018056, - "nauc_recall_at_100_max": -0.23261397534923414, - "nauc_recall_at_100_std": -0.05979942942574269, - "nauc_recall_at_10_diff1": 0.3361904724346995, - "nauc_recall_at_10_max": -0.28110372866797806, - "nauc_recall_at_10_std": -0.36784022778417585, - "nauc_recall_at_1_diff1": 0.3225400054210336, - "nauc_recall_at_1_max": -0.20455804791357643, - "nauc_recall_at_1_std": -0.3502703918125217, - "nauc_recall_at_20_diff1": 0.31944679632949374, - "nauc_recall_at_20_max": -0.32588194303425905, - "nauc_recall_at_20_std": -0.25546971593163326, - "nauc_recall_at_3_diff1": 0.36197730614062007, - "nauc_recall_at_3_max": -0.07376636861137077, - "nauc_recall_at_3_std": -0.2538555661579059, - "nauc_recall_at_5_diff1": 0.3711994561836934, - "nauc_recall_at_5_max": -0.1684006987187415, - "nauc_recall_at_5_std": -0.373838797717674, - "ndcg_at_1": 0.04286, - "ndcg_at_10": 0.08888, - "ndcg_at_100": 0.20767, - "ndcg_at_1000": 0.46304, - "ndcg_at_20": 0.0984, - "ndcg_at_3": 0.06137, - "ndcg_at_5": 0.07145, - "precision_at_1": 0.04286, - "precision_at_10": 0.10143, - "precision_at_100": 0.202, - "precision_at_1000": 0.09564, - "precision_at_20": 0.10857, - "precision_at_3": 0.06667, - "precision_at_5": 0.08, - "recall_at_1": 0.00032, - "recall_at_10": 0.0087, - "recall_at_100": 0.18965, - "recall_at_1000": 0.67973, - "recall_at_20": 0.01989, - "recall_at_3": 0.00187, - "recall_at_5": 0.00327 + "nauc_cv_recall_at_100_diff1": NaN, + "nauc_cv_recall_at_100_max": NaN, + "nauc_cv_recall_at_100_std": NaN, + "nauc_cv_recall_at_10_diff1": 0.13932174607931247, + "nauc_cv_recall_at_10_max": -0.8963390270702233, + "nauc_cv_recall_at_10_std": -0.8396014384177408, + "nauc_cv_recall_at_1_diff1": 0.2060503021455104, + "nauc_cv_recall_at_1_max": -0.5281725278583708, + "nauc_cv_recall_at_1_std": -0.7573041598137233, + "nauc_cv_recall_at_20_diff1": 0.0628172996594046, + "nauc_cv_recall_at_20_max": -0.7762997236681444, + "nauc_cv_recall_at_20_std": -0.7122935543988169, + "nauc_cv_recall_at_3_diff1": 0.35040951939422, + "nauc_cv_recall_at_3_max": -0.6603886570854577, + "nauc_cv_recall_at_3_std": -0.80932236130428, + "nauc_cv_recall_at_5_diff1": 0.345291099520841, + "nauc_cv_recall_at_5_max": -0.6727939617521097, + "nauc_cv_recall_at_5_std": -0.7926260441843702, + "nauc_map_at_1000_diff1": 0.2637485190424332, + "nauc_map_at_1000_max": -0.3214602514655989, + "nauc_map_at_1000_std": -0.3732986822642579, + "nauc_map_at_100_diff1": 0.31321538209822075, + "nauc_map_at_100_max": -0.4434862100043841, + "nauc_map_at_100_std": -0.5807650790699361, + "nauc_map_at_10_diff1": 0.3581935661941152, + "nauc_map_at_10_max": -0.4787594793701487, + "nauc_map_at_10_std": -0.5695584780209748, + "nauc_map_at_1_diff1": 0.35392215263704485, + "nauc_map_at_1_max": -0.47033776664547916, + "nauc_map_at_1_std": -0.581374357285355, + "nauc_map_at_20_diff1": 0.33452504163824237, + "nauc_map_at_20_max": -0.4689043535361316, + "nauc_map_at_20_std": -0.5815981719022251, + "nauc_map_at_3_diff1": 0.3418415128674503, + "nauc_map_at_3_max": -0.48587780235367684, + "nauc_map_at_3_std": -0.5797047003739161, + "nauc_map_at_5_diff1": 0.3476248106096462, + "nauc_map_at_5_max": -0.4810067114931804, + "nauc_map_at_5_std": -0.5755673287021957, + "nauc_mrr_at_1000_diff1": 0.25879474150167453, + "nauc_mrr_at_1000_max": -0.5954028468975692, + "nauc_mrr_at_1000_std": -0.7782793535054984, + "nauc_mrr_at_100_diff1": 0.25879474150167453, + "nauc_mrr_at_100_max": -0.5954028468975692, + "nauc_mrr_at_100_std": -0.7782793535054984, + "nauc_mrr_at_10_diff1": 0.25457966886240146, + "nauc_mrr_at_10_max": -0.6072896469946196, + "nauc_mrr_at_10_std": -0.7834200101118244, + "nauc_mrr_at_1_diff1": 0.2060503021455104, + "nauc_mrr_at_1_max": -0.5281725278583708, + "nauc_mrr_at_1_std": -0.7573041598137233, + "nauc_mrr_at_20_diff1": 0.25380616623495234, + "nauc_mrr_at_20_max": -0.5999473178413075, + "nauc_mrr_at_20_std": -0.778340949953845, + "nauc_mrr_at_3_diff1": 0.276790929933105, + "nauc_mrr_at_3_max": -0.5865299456675351, + "nauc_mrr_at_3_std": -0.7820043340347346, + "nauc_mrr_at_5_diff1": 0.2734971388552989, + "nauc_mrr_at_5_max": -0.5883165176933902, + "nauc_mrr_at_5_std": -0.7787421973872966, + "nauc_ndcg_at_1000_diff1": 0.29912959985776905, + "nauc_ndcg_at_1000_max": -0.28569354071871356, + "nauc_ndcg_at_1000_std": -0.3302978581904054, + "nauc_ndcg_at_100_diff1": 0.33339067606556777, + "nauc_ndcg_at_100_max": -0.3794361926736525, + "nauc_ndcg_at_100_std": -0.4883736139930127, + "nauc_ndcg_at_10_diff1": 0.26397022259588276, + "nauc_ndcg_at_10_max": -0.5817578270842201, + "nauc_ndcg_at_10_std": -0.7307563120549173, + "nauc_ndcg_at_1_diff1": 0.2060503021455104, + "nauc_ndcg_at_1_max": -0.5281725278583708, + "nauc_ndcg_at_1_std": -0.7573041598137233, + "nauc_ndcg_at_20_diff1": 0.2623409031073529, + "nauc_ndcg_at_20_max": -0.5359230413673359, + "nauc_ndcg_at_20_std": -0.6975403451344717, + "nauc_ndcg_at_3_diff1": 0.23259798801223266, + "nauc_ndcg_at_3_max": -0.573906976133018, + "nauc_ndcg_at_3_std": -0.7515043175758601, + "nauc_ndcg_at_5_diff1": 0.2390793878627882, + "nauc_ndcg_at_5_max": -0.5712510242428932, + "nauc_ndcg_at_5_std": -0.7286974701355182, + "nauc_precision_at_1000_diff1": -0.24245498329943332, + "nauc_precision_at_1000_max": 0.3291646825531801, + "nauc_precision_at_1000_std": 0.5255011870667545, + "nauc_precision_at_100_diff1": 0.13623543103107302, + "nauc_precision_at_100_max": -0.07853246653158767, + "nauc_precision_at_100_std": -0.14296193871521895, + "nauc_precision_at_10_diff1": 0.2775400329964219, + "nauc_precision_at_10_max": -0.5859260716532688, + "nauc_precision_at_10_std": -0.7212562014378959, + "nauc_precision_at_1_diff1": 0.2060503021455104, + "nauc_precision_at_1_max": -0.5281725278583708, + "nauc_precision_at_1_std": -0.7573041598137233, + "nauc_precision_at_20_diff1": 0.26665797590030293, + "nauc_precision_at_20_max": -0.5123924265023908, + "nauc_precision_at_20_std": -0.6708786566904752, + "nauc_precision_at_3_diff1": 0.2363702775381167, + "nauc_precision_at_3_max": -0.5803676577092874, + "nauc_precision_at_3_std": -0.7427529614771804, + "nauc_precision_at_5_diff1": 0.2456529314233418, + "nauc_precision_at_5_max": -0.573319088810008, + "nauc_precision_at_5_std": -0.7135127683771583, + "nauc_recall_at_1000_diff1": 0.38214489204648355, + "nauc_recall_at_1000_max": -0.09155063706261089, + "nauc_recall_at_1000_std": -0.022172073157980317, + "nauc_recall_at_100_diff1": 0.3802641372927062, + "nauc_recall_at_100_max": -0.3842566138245613, + "nauc_recall_at_100_std": -0.5385141624854306, + "nauc_recall_at_10_diff1": 0.3783267103081628, + "nauc_recall_at_10_max": -0.488856095520999, + "nauc_recall_at_10_std": -0.58247849954068, + "nauc_recall_at_1_diff1": 0.35392215263704485, + "nauc_recall_at_1_max": -0.47033776664547916, + "nauc_recall_at_1_std": -0.581374357285355, + "nauc_recall_at_20_diff1": 0.3567752100934632, + "nauc_recall_at_20_max": -0.4784863097401327, + "nauc_recall_at_20_std": -0.5895318668296887, + "nauc_recall_at_3_diff1": 0.35482016307799724, + "nauc_recall_at_3_max": -0.492800686077188, + "nauc_recall_at_3_std": -0.5845744958893286, + "nauc_recall_at_5_diff1": 0.36822693817594376, + "nauc_recall_at_5_max": -0.4820944786320667, + "nauc_recall_at_5_std": -0.5821080266388661, + "ndcg_at_1": 0.41429, + "ndcg_at_10": 0.43851, + "ndcg_at_100": 0.44725, + "ndcg_at_1000": 0.67745, + "ndcg_at_20": 0.42956, + "ndcg_at_3": 0.42275, + "ndcg_at_5": 0.43186, + "precision_at_1": 0.41429, + "precision_at_10": 0.44429, + "precision_at_100": 0.36357, + "precision_at_1000": 0.12124, + "precision_at_20": 0.42857, + "precision_at_3": 0.42381, + "precision_at_5": 0.43714, + "recall_at_1": 0.00597, + "recall_at_10": 0.06062, + "recall_at_100": 0.35778, + "recall_at_1000": 0.83615, + "recall_at_20": 0.11218, + "recall_at_3": 0.01773, + "recall_at_5": 0.03016 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json index 700a4a2774..3f2c633d86 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/RParisMediumI2IRetrieval.json @@ -1,12 +1,12 @@ { - "dataset_revision": "900267b49003a086979e8d52f6942624236bfc34", - "evaluation_time": 23.04814124107361, + "dataset_revision": "3d959815e102785efd628170281f1e65561b03d2", + "evaluation_time": 79.89404535293579, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "cv_recall_at_1": 0.98571, + "cv_recall_at_1": 1.0, "cv_recall_at_10": 1.0, "cv_recall_at_100": 1.0, "cv_recall_at_1000": 1.0, @@ -17,21 +17,21 @@ "languages": [ "eng-Latn" ], - "main_score": 0.98571, - "map_at_1": 0.0055, - "map_at_10": 0.0505, - "map_at_100": 0.32128, - "map_at_1000": 0.53905, - "map_at_20": 0.09538, - "map_at_3": 0.01586, - "map_at_5": 0.02626, - "mrr_at_1": 0.9857142857142858, - "mrr_at_10": 0.9928571428571429, - "mrr_at_100": 0.9928571428571429, - "mrr_at_1000": 0.9928571428571429, - "mrr_at_20": 0.9928571428571429, - "mrr_at_3": 0.9928571428571429, - "mrr_at_5": 0.9928571428571429, + "main_score": 1.0, + "map_at_1": 0.00556, + "map_at_10": 0.05371, + "map_at_100": 0.40433, + "map_at_1000": 0.65468, + "map_at_20": 0.10419, + "map_at_3": 0.01659, + "map_at_5": 0.02755, + "mrr_at_1": 1.0, + "mrr_at_10": 1.0, + "mrr_at_100": 1.0, + "mrr_at_1000": 1.0, + "mrr_at_20": 1.0, + "mrr_at_3": 1.0, + "mrr_at_5": 1.0, "nauc_cv_recall_at_1000_diff1": NaN, "nauc_cv_recall_at_1000_max": NaN, "nauc_cv_recall_at_1000_std": NaN, @@ -41,9 +41,9 @@ "nauc_cv_recall_at_10_diff1": NaN, "nauc_cv_recall_at_10_max": NaN, "nauc_cv_recall_at_10_std": NaN, - "nauc_cv_recall_at_1_diff1": 0.8692810457516342, - "nauc_cv_recall_at_1_max": -1.151727357609713, - "nauc_cv_recall_at_1_std": -0.5634920634920669, + "nauc_cv_recall_at_1_diff1": NaN, + "nauc_cv_recall_at_1_max": NaN, + "nauc_cv_recall_at_1_std": NaN, "nauc_cv_recall_at_20_diff1": NaN, "nauc_cv_recall_at_20_max": NaN, "nauc_cv_recall_at_20_std": NaN, @@ -53,132 +53,132 @@ "nauc_cv_recall_at_5_diff1": NaN, "nauc_cv_recall_at_5_max": NaN, "nauc_cv_recall_at_5_std": NaN, - "nauc_map_at_1000_diff1": -0.5202144222378728, - "nauc_map_at_1000_max": 0.11126568280766698, - "nauc_map_at_1000_std": 0.32813155264948524, - "nauc_map_at_100_diff1": -0.17082735069359992, - "nauc_map_at_100_max": -0.07951969633170079, - "nauc_map_at_100_std": 0.13798406208276, - "nauc_map_at_10_diff1": 0.08925396593997569, - "nauc_map_at_10_max": -0.078676032579074, - "nauc_map_at_10_std": 0.13021224342356594, - "nauc_map_at_1_diff1": 0.20034146659034022, - "nauc_map_at_1_max": -0.1488526539306114, - "nauc_map_at_1_std": 0.036837238597286596, - "nauc_map_at_20_diff1": 0.044029095752394776, - "nauc_map_at_20_max": -0.044115438949865464, - "nauc_map_at_20_std": 0.16589359171211718, - "nauc_map_at_3_diff1": 0.12936241106392424, - "nauc_map_at_3_max": -0.11171026414768116, - "nauc_map_at_3_std": 0.09557523635145118, - "nauc_map_at_5_diff1": 0.12090379123610429, - "nauc_map_at_5_max": -0.11656881579212829, - "nauc_map_at_5_std": 0.09921919672363319, - "nauc_mrr_at_1000_diff1": 0.8692810457516355, - "nauc_mrr_at_1000_max": -1.1517273576096931, - "nauc_mrr_at_1000_std": -0.5634920634920526, - "nauc_mrr_at_100_diff1": 0.8692810457516355, - "nauc_mrr_at_100_max": -1.1517273576096931, - "nauc_mrr_at_100_std": -0.5634920634920526, - "nauc_mrr_at_10_diff1": 0.8692810457516355, - "nauc_mrr_at_10_max": -1.1517273576096931, - "nauc_mrr_at_10_std": -0.5634920634920526, - "nauc_mrr_at_1_diff1": 0.8692810457516342, - "nauc_mrr_at_1_max": -1.151727357609713, - "nauc_mrr_at_1_std": -0.5634920634920669, - "nauc_mrr_at_20_diff1": 0.8692810457516355, - "nauc_mrr_at_20_max": -1.1517273576096931, - "nauc_mrr_at_20_std": -0.5634920634920526, - "nauc_mrr_at_3_diff1": 0.8692810457516355, - "nauc_mrr_at_3_max": -1.1517273576096931, - "nauc_mrr_at_3_std": -0.5634920634920526, - "nauc_mrr_at_5_diff1": 0.8692810457516355, - "nauc_mrr_at_5_max": -1.1517273576096931, - "nauc_mrr_at_5_std": -0.5634920634920526, - "nauc_ndcg_at_1000_diff1": -0.44820569134469457, - "nauc_ndcg_at_1000_max": 0.14278328260517373, - "nauc_ndcg_at_1000_std": 0.35909204639356485, - "nauc_ndcg_at_100_diff1": -0.6018888319473394, - "nauc_ndcg_at_100_max": 0.21192336225080224, - "nauc_ndcg_at_100_std": 0.25475321800547673, - "nauc_ndcg_at_10_diff1": -0.5815570895956712, - "nauc_ndcg_at_10_max": 0.2384364105122267, - "nauc_ndcg_at_10_std": 0.47588628797056887, - "nauc_ndcg_at_1_diff1": 0.8692810457516342, - "nauc_ndcg_at_1_max": -1.151727357609713, - "nauc_ndcg_at_1_std": -0.5634920634920669, - "nauc_ndcg_at_20_diff1": -0.5953841029416077, - "nauc_ndcg_at_20_max": 0.32157620990503083, - "nauc_ndcg_at_20_std": 0.47071964995241883, - "nauc_ndcg_at_3_diff1": -0.47333802126140007, - "nauc_ndcg_at_3_max": 0.13407004770022382, - "nauc_ndcg_at_3_std": 0.46679765020950414, - "nauc_ndcg_at_5_diff1": -0.5587389095148461, - "nauc_ndcg_at_5_max": 0.07339891244234566, - "nauc_ndcg_at_5_std": 0.47215237618630984, - "nauc_precision_at_1000_diff1": -0.3950971876695561, - "nauc_precision_at_1000_max": 0.24406161426957987, - "nauc_precision_at_1000_std": 0.12174947607128742, - "nauc_precision_at_100_diff1": -0.5472068296947269, - "nauc_precision_at_100_max": 0.23378022737023205, - "nauc_precision_at_100_std": 0.1829472692928344, - "nauc_precision_at_10_diff1": -0.5950517610712319, - "nauc_precision_at_10_max": 0.28672134365425656, - "nauc_precision_at_10_std": 0.48780862485384807, - "nauc_precision_at_1_diff1": 0.8692810457516342, - "nauc_precision_at_1_max": -1.151727357609713, - "nauc_precision_at_1_std": -0.5634920634920669, - "nauc_precision_at_20_diff1": -0.5947364591012498, - "nauc_precision_at_20_max": 0.35422908868857506, - "nauc_precision_at_20_std": 0.4705389149562903, - "nauc_precision_at_3_diff1": -0.510094916344916, - "nauc_precision_at_3_max": 0.22297297297297156, - "nauc_precision_at_3_std": 0.5162886100386108, - "nauc_precision_at_5_diff1": -0.6028283796740131, - "nauc_precision_at_5_max": 0.11490891658676922, - "nauc_precision_at_5_std": 0.504458293384468, - "nauc_recall_at_1000_diff1": -0.2460203893111549, - "nauc_recall_at_1000_max": 0.07681666757366377, - "nauc_recall_at_1000_std": 0.29402261311504047, - "nauc_recall_at_100_diff1": -0.10390008545301123, - "nauc_recall_at_100_max": -0.09447732649357599, - "nauc_recall_at_100_std": 0.0928830407002215, - "nauc_recall_at_10_diff1": 0.10590480631127752, - "nauc_recall_at_10_max": -0.08162621409653509, - "nauc_recall_at_10_std": 0.11847839880663995, - "nauc_recall_at_1_diff1": 0.20034146659034022, - "nauc_recall_at_1_max": -0.1488526539306114, - "nauc_recall_at_1_std": 0.036837238597286596, - "nauc_recall_at_20_diff1": 0.06586007946887822, - "nauc_recall_at_20_max": -0.05281307817884481, - "nauc_recall_at_20_std": 0.14970188120798883, - "nauc_recall_at_3_diff1": 0.13126830970729877, - "nauc_recall_at_3_max": -0.10596900293507791, - "nauc_recall_at_3_std": 0.09476793236811286, - "nauc_recall_at_5_diff1": 0.12983221018552543, - "nauc_recall_at_5_max": -0.11532911944953175, - "nauc_recall_at_5_std": 0.09212571294283788, - "ndcg_at_1": 0.98571, - "ndcg_at_10": 0.92841, - "ndcg_at_100": 0.77588, - "ndcg_at_1000": 0.7685, - "ndcg_at_20": 0.89894, - "ndcg_at_3": 0.95291, - "ndcg_at_5": 0.94991, - "precision_at_1": 0.98571, - "precision_at_10": 0.91571, - "precision_at_100": 0.721, - "precision_at_1000": 0.18663, - "precision_at_20": 0.88, - "precision_at_3": 0.94286, - "precision_at_5": 0.94286, - "recall_at_1": 0.0055, - "recall_at_10": 0.05165, - "recall_at_100": 0.36172, - "recall_at_1000": 0.79065, - "recall_at_20": 0.0987, - "recall_at_3": 0.01595, - "recall_at_5": 0.0266 + "nauc_map_at_1000_diff1": -0.37033768465220357, + "nauc_map_at_1000_max": 0.2394188102952743, + "nauc_map_at_1000_std": 0.35189302913259624, + "nauc_map_at_100_diff1": -0.0008350440689574251, + "nauc_map_at_100_max": 0.06810001616925912, + "nauc_map_at_100_std": -0.08292235454963914, + "nauc_map_at_10_diff1": 0.137914576743156, + "nauc_map_at_10_max": -0.09262735632646613, + "nauc_map_at_10_std": -0.19844704585800624, + "nauc_map_at_1_diff1": 0.18523999970603441, + "nauc_map_at_1_max": -0.1265328999786591, + "nauc_map_at_1_std": -0.230231416885371, + "nauc_map_at_20_diff1": 0.08800700279626224, + "nauc_map_at_20_max": -0.05754868196581078, + "nauc_map_at_20_std": -0.1590299889904529, + "nauc_map_at_3_diff1": 0.1740453429583345, + "nauc_map_at_3_max": -0.11523868645571757, + "nauc_map_at_3_std": -0.22138850054902404, + "nauc_map_at_5_diff1": 0.1634612763199569, + "nauc_map_at_5_max": -0.11473486161026644, + "nauc_map_at_5_std": -0.21287472489660672, + "nauc_mrr_at_1000_diff1": NaN, + "nauc_mrr_at_1000_max": NaN, + "nauc_mrr_at_1000_std": NaN, + "nauc_mrr_at_100_diff1": NaN, + "nauc_mrr_at_100_max": NaN, + "nauc_mrr_at_100_std": NaN, + "nauc_mrr_at_10_diff1": NaN, + "nauc_mrr_at_10_max": NaN, + "nauc_mrr_at_10_std": NaN, + "nauc_mrr_at_1_diff1": NaN, + "nauc_mrr_at_1_max": NaN, + "nauc_mrr_at_1_std": NaN, + "nauc_mrr_at_20_diff1": NaN, + "nauc_mrr_at_20_max": NaN, + "nauc_mrr_at_20_std": NaN, + "nauc_mrr_at_3_diff1": NaN, + "nauc_mrr_at_3_max": NaN, + "nauc_mrr_at_3_std": NaN, + "nauc_mrr_at_5_diff1": NaN, + "nauc_mrr_at_5_max": NaN, + "nauc_mrr_at_5_std": NaN, + "nauc_ndcg_at_1000_diff1": -0.22205760084759807, + "nauc_ndcg_at_1000_max": 0.10796195890373474, + "nauc_ndcg_at_1000_std": 0.26231183916047035, + "nauc_ndcg_at_100_diff1": -0.6398984813885491, + "nauc_ndcg_at_100_max": 0.4842652819900039, + "nauc_ndcg_at_100_std": 0.5107149500380104, + "nauc_ndcg_at_10_diff1": -0.9371088266265549, + "nauc_ndcg_at_10_max": 0.6084341828001775, + "nauc_ndcg_at_10_std": 0.5430117586762949, + "nauc_ndcg_at_1_diff1": NaN, + "nauc_ndcg_at_1_max": NaN, + "nauc_ndcg_at_1_std": NaN, + "nauc_ndcg_at_20_diff1": -0.9268541652920639, + "nauc_ndcg_at_20_max": 0.597237167140367, + "nauc_ndcg_at_20_std": 0.5649258862662437, + "nauc_ndcg_at_3_diff1": -0.8999624476329307, + "nauc_ndcg_at_3_max": 1.0, + "nauc_ndcg_at_3_std": 0.6408995549373118, + "nauc_ndcg_at_5_diff1": -1.1761404982018615, + "nauc_ndcg_at_5_max": 0.8247853976989885, + "nauc_ndcg_at_5_std": 0.7965821461413944, + "nauc_precision_at_1000_diff1": -0.31783317612679535, + "nauc_precision_at_1000_max": 0.1930712065318718, + "nauc_precision_at_1000_std": 0.4208950298526623, + "nauc_precision_at_100_diff1": -0.539592973425393, + "nauc_precision_at_100_max": 0.4460055144887294, + "nauc_precision_at_100_std": 0.4485043771568464, + "nauc_precision_at_10_diff1": -0.9108685686074025, + "nauc_precision_at_10_max": 0.5802094720880185, + "nauc_precision_at_10_std": 0.5333427372430083, + "nauc_precision_at_1_diff1": NaN, + "nauc_precision_at_1_max": NaN, + "nauc_precision_at_1_std": NaN, + "nauc_precision_at_20_diff1": -0.9101954663166522, + "nauc_precision_at_20_max": 0.5867536982510865, + "nauc_precision_at_20_std": 0.5645109794600713, + "nauc_precision_at_3_diff1": -0.8266417678182514, + "nauc_precision_at_3_max": 1.0, + "nauc_precision_at_3_std": 0.6095549330843374, + "nauc_precision_at_5_diff1": -1.1852629940865032, + "nauc_precision_at_5_max": 0.8047774665421796, + "nauc_precision_at_5_std": 0.8047774665421796, + "nauc_recall_at_1000_diff1": -0.01696359441028296, + "nauc_recall_at_1000_max": -0.06214643338578423, + "nauc_recall_at_1000_std": 0.12422436451417722, + "nauc_recall_at_100_diff1": 0.05923389523513676, + "nauc_recall_at_100_max": 0.023919554920948157, + "nauc_recall_at_100_std": -0.13407391685199146, + "nauc_recall_at_10_diff1": 0.14007744935778899, + "nauc_recall_at_10_max": -0.09598247717834782, + "nauc_recall_at_10_std": -0.19995201976434931, + "nauc_recall_at_1_diff1": 0.18523999970603441, + "nauc_recall_at_1_max": -0.1265328999786591, + "nauc_recall_at_1_std": -0.230231416885371, + "nauc_recall_at_20_diff1": 0.09732767803178609, + "nauc_recall_at_20_max": -0.06812531189724261, + "nauc_recall_at_20_std": -0.16830905580000974, + "nauc_recall_at_3_diff1": 0.1740453429583345, + "nauc_recall_at_3_max": -0.11523868645571757, + "nauc_recall_at_3_std": -0.22138850054902404, + "nauc_recall_at_5_diff1": 0.16407715576816015, + "nauc_recall_at_5_max": -0.11553420065887736, + "nauc_recall_at_5_std": -0.2140734184274355, + "ndcg_at_1": 1.0, + "ndcg_at_10": 0.97012, + "ndcg_at_100": 0.87092, + "ndcg_at_1000": 0.84355, + "ndcg_at_20": 0.95286, + "ndcg_at_3": 0.98907, + "ndcg_at_5": 0.98605, + "precision_at_1": 1.0, + "precision_at_10": 0.96143, + "precision_at_100": 0.821, + "precision_at_1000": 0.20586, + "precision_at_20": 0.94143, + "precision_at_3": 0.98571, + "precision_at_5": 0.98286, + "recall_at_1": 0.00556, + "recall_at_10": 0.05391, + "recall_at_100": 0.42614, + "recall_at_1000": 0.86053, + "recall_at_20": 0.10535, + "recall_at_3": 0.01659, + "recall_at_5": 0.02758 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/model_meta.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/model_meta.json index cdfeb8c90d..0b09a17fae 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/model_meta.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/model_meta.json @@ -1 +1 @@ -{"name": "openai/clip-vit-base-patch32", "revision": "3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268", "release_date": "2021-02-26", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": true, "similarity_fn_name": null, "framework": [], "loader": "CLIPModelWrapper"} \ No newline at end of file +{"name": "openai/clip-vit-base-patch32", "revision": "3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268", "release_date": "2021-02-26", "languages": ["eng_Latn"], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_weights": null, "public_training_data": null, "public_training_code": null, "framework": [], "reference": null, "similarity_fn_name": null, "use_instuctions": null, "zero_shot_benchmarks": null, "loader": "CLIPModelWrapper"} \ No newline at end of file From ac51be8755e984c3001f22541bca24733b06665b Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 7 Nov 2024 13:54:18 +0000 Subject: [PATCH 68/73] fix: add/remove subtasks from BLINKIT2IMultiChoice and BLINKIT2TMultiChoice --- mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py | 2 +- mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index d600eaa4f2..695f3f6a79 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2i-multi", - "revision": "780ade70cd769e586502a61dda903e525f945a45", + "revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", "trust_remote_code": True, }, type="Any2AnyMultiChoice", diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index fe37216de0..d093d75fdf 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2t-multi", - "revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", + "revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", }, type="Any2AnyMultiChoice", category="it2t", From dca764a092f304b5d4a26bb9d25ff4e16ca7648b Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 7 Nov 2024 13:55:15 +0000 Subject: [PATCH 69/73] update blink metadata --- .../Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py | 6 +++--- .../Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index 695f3f6a79..98b0a0120b 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -35,13 +35,13 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 402}, + "n_samples": {"test": 534}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 804, - "num_queries": 402, + "num_documents": 1200, + "num_queries": 534, "average_relevant_docs_per_query": 1, } }, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index d093d75fdf..60f42b8b05 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -34,13 +34,13 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 1073}, + "n_samples": {"test": 923}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 26, - "num_queries": 1073, + "num_documents": 24, + "num_queries": 923, "average_relevant_docs_per_query": 1, } }, From 2d371c7da8692aa3ae04462071644f25f65995bf Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 7 Nov 2024 14:00:54 +0000 Subject: [PATCH 70/73] add updated BLINK results --- .../BLINKIT2IMultiChoice.json | 36 +++++++++---------- .../BLINKIT2TMultiChoice.json | 36 +++++++++---------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json index 648d3aa59e..6b300b8219 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "780ade70cd769e586502a61dda903e525f945a45", - "evaluation_time": 56.62301731109619, + "dataset_revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", + "evaluation_time": 30.517717123031616, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.70149, + "accuracy": 0.6161, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.70149, - "mrr_at_1": 0.7014925373134329, - "mrr_at_10": 0.8507462686567164, - "mrr_at_100": 0.8507462686567164, - "mrr_at_1000": 0.8507462686567164, - "mrr_at_20": 0.8507462686567164, - "mrr_at_3": 0.8507462686567164, - "mrr_at_5": 0.8507462686567164, - "ndcg_at_1": 0.70149, - "ndcg_at_10": 0.88983, - "ndcg_at_100": 0.88983, - "ndcg_at_1000": 0.88983, - "ndcg_at_20": 0.88983, - "ndcg_at_3": 0.88983, - "ndcg_at_5": 0.88983 + "main_score": 0.6161, + "mrr_at_1": 0.6161048689138576, + "mrr_at_10": 0.7762172284644191, + "mrr_at_100": 0.7762172284644191, + "mrr_at_1000": 0.7762172284644191, + "mrr_at_20": 0.7762172284644191, + "mrr_at_3": 0.7762172284644191, + "mrr_at_5": 0.7762172284644191, + "ndcg_at_1": 0.6161, + "ndcg_at_10": 0.81993, + "ndcg_at_100": 0.81993, + "ndcg_at_1000": 0.81993, + "ndcg_at_20": 0.81993, + "ndcg_at_3": 0.81993, + "ndcg_at_5": 0.81993 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json index bda6cd2cb9..55aca4a70e 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", - "evaluation_time": 43.71325731277466, + "dataset_revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", + "evaluation_time": 10.223464965820312, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.38397, + "accuracy": 0.36945, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.38397, - "mrr_at_1": 0.38397017707362535, - "mrr_at_10": 0.650512581547066, - "mrr_at_100": 0.650512581547066, - "mrr_at_1000": 0.650512581547066, - "mrr_at_20": 0.650512581547066, - "mrr_at_3": 0.6295433364398889, - "mrr_at_5": 0.650512581547066, - "ndcg_at_1": 0.38397, - "ndcg_at_10": 0.73974, - "ndcg_at_100": 0.73974, - "ndcg_at_1000": 0.73974, - "ndcg_at_20": 0.73974, - "ndcg_at_3": 0.70361, - "ndcg_at_5": 0.73974 + "main_score": 0.36945, + "mrr_at_1": 0.3694474539544962, + "mrr_at_10": 0.6365113759479949, + "mrr_at_100": 0.6365113759479949, + "mrr_at_1000": 0.6365113759479949, + "mrr_at_20": 0.6365113759479949, + "mrr_at_3": 0.61213434452871, + "mrr_at_5": 0.6365113759479949, + "ndcg_at_1": 0.36945, + "ndcg_at_10": 0.72903, + "ndcg_at_100": 0.72903, + "ndcg_at_1000": 0.72903, + "ndcg_at_20": 0.72903, + "ndcg_at_3": 0.68704, + "ndcg_at_5": 0.72903 } ] }, From 7e69e166287baa097244c1827f3f9b7eb8698d10 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Wed, 13 Nov 2024 15:19:41 +0000 Subject: [PATCH 71/73] merge upstream mieb --- .../Flickr30kI2TRetrieval.json | 186 ------------------ .../Flickr30kT2IRetrieval.json | 186 ------------------ 2 files changed, 372 deletions(-) delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json delete mode 100644 results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json deleted file mode 100644 index 714a658b14..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kI2TRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", - "evaluation_time": 2905.3177580833435, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 0.4111, - "cv_recall_at_10": 0.74505, - "cv_recall_at_100": 0.94796, - "cv_recall_at_1000": 0.99607, - "cv_recall_at_20": 0.82443, - "cv_recall_at_3": 0.58196, - "cv_recall_at_5": 0.65757, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 0.33866, - "map_at_1": 0.08222, - "map_at_10": 0.2347, - "map_at_100": 0.26833, - "map_at_1000": 0.27226, - "map_at_20": 0.25195, - "map_at_3": 0.16585, - "map_at_5": 0.20389, - "mrr_at_1": 0.4111046624105243, - "mrr_at_10": 0.5151111351658093, - "mrr_at_100": 0.5239514165075607, - "mrr_at_1000": 0.5242073041502724, - "mrr_at_20": 0.5206470443576335, - "mrr_at_3": 0.4860761806496125, - "mrr_at_5": 0.503347412996305, - "nauc_cv_recall_at_1000_diff1": 0.18030127835739224, - "nauc_cv_recall_at_1000_max": 0.5581466344401298, - "nauc_cv_recall_at_1000_std": 0.8631168401994536, - "nauc_cv_recall_at_100_diff1": 0.272670704205089, - "nauc_cv_recall_at_100_max": 0.4612079444879557, - "nauc_cv_recall_at_100_std": 0.5695575488041834, - "nauc_cv_recall_at_10_diff1": 0.28543669447055997, - "nauc_cv_recall_at_10_max": 0.36347027563947776, - "nauc_cv_recall_at_10_std": 0.2385430096241168, - "nauc_cv_recall_at_1_diff1": 0.3784400483602523, - "nauc_cv_recall_at_1_max": 0.3504797969786945, - "nauc_cv_recall_at_1_std": 0.10634115715764078, - "nauc_cv_recall_at_20_diff1": 0.27508492044537997, - "nauc_cv_recall_at_20_max": 0.3714030729491561, - "nauc_cv_recall_at_20_std": 0.2959015251319611, - "nauc_cv_recall_at_3_diff1": 0.31233109201191445, - "nauc_cv_recall_at_3_max": 0.35059358000714524, - "nauc_cv_recall_at_3_std": 0.15142403433656834, - "nauc_cv_recall_at_5_diff1": 0.2974464195367057, - "nauc_cv_recall_at_5_max": 0.35509762440464737, - "nauc_cv_recall_at_5_std": 0.18640081229405794, - "nauc_map_at_1000_diff1": 0.2662124495945085, - "nauc_map_at_1000_max": 0.40406570060222413, - "nauc_map_at_1000_std": 0.1959460147481183, - "nauc_map_at_100_diff1": 0.2659471804759087, - "nauc_map_at_100_max": 0.40363986652542505, - "nauc_map_at_100_std": 0.1947488241847008, - "nauc_map_at_10_diff1": 0.2666647469717836, - "nauc_map_at_10_max": 0.39968979149613193, - "nauc_map_at_10_std": 0.16723592929025385, - "nauc_map_at_1_diff1": 0.37844004836025164, - "nauc_map_at_1_max": 0.3504797969786941, - "nauc_map_at_1_std": 0.1063411571576403, - "nauc_map_at_20_diff1": 0.2650108128343263, - "nauc_map_at_20_max": 0.4008348432286667, - "nauc_map_at_20_std": 0.17971414027637928, - "nauc_map_at_3_diff1": 0.28590676523718034, - "nauc_map_at_3_max": 0.3781596069511839, - "nauc_map_at_3_std": 0.13040101399163928, - "nauc_map_at_5_diff1": 0.27462944218025, - "nauc_map_at_5_max": 0.39938962890452623, - "nauc_map_at_5_std": 0.14980070399423312, - "nauc_mrr_at_1000_diff1": 0.3494548271148969, - "nauc_mrr_at_1000_max": 0.3521197992435183, - "nauc_mrr_at_1000_std": 0.1344856075273458, - "nauc_mrr_at_100_diff1": 0.349406446391289, - "nauc_mrr_at_100_max": 0.35218688878029347, - "nauc_mrr_at_100_std": 0.1347720116595227, - "nauc_mrr_at_10_diff1": 0.34832734062778187, - "nauc_mrr_at_10_max": 0.35184655254909514, - "nauc_mrr_at_10_std": 0.13412235150155113, - "nauc_mrr_at_1_diff1": 0.3784400483602523, - "nauc_mrr_at_1_max": 0.3504797969786945, - "nauc_mrr_at_1_std": 0.10634115715764078, - "nauc_mrr_at_20_diff1": 0.34874544122097734, - "nauc_mrr_at_20_max": 0.35198594125683075, - "nauc_mrr_at_20_std": 0.1348277834635207, - "nauc_mrr_at_3_diff1": 0.3502458632268556, - "nauc_mrr_at_3_max": 0.3503121014809114, - "nauc_mrr_at_3_std": 0.12461253730902709, - "nauc_mrr_at_5_diff1": 0.3483495916749157, - "nauc_mrr_at_5_max": 0.3512186490276025, - "nauc_mrr_at_5_std": 0.13065045260516017, - "nauc_ndcg_at_1000_diff1": 0.28085910013736953, - "nauc_ndcg_at_1000_max": 0.400173338146715, - "nauc_ndcg_at_1000_std": 0.2454880347293459, - "nauc_ndcg_at_100_diff1": 0.27614261400212725, - "nauc_ndcg_at_100_max": 0.3975804199137475, - "nauc_ndcg_at_100_std": 0.24550705671790238, - "nauc_ndcg_at_10_diff1": 0.2770040634103947, - "nauc_ndcg_at_10_max": 0.386892601870249, - "nauc_ndcg_at_10_std": 0.17593863508925003, - "nauc_ndcg_at_1_diff1": 0.3784400483602523, - "nauc_ndcg_at_1_max": 0.3504797969786945, - "nauc_ndcg_at_1_std": 0.10634115715764078, - "nauc_ndcg_at_20_diff1": 0.27393587281733567, - "nauc_ndcg_at_20_max": 0.3893030504620739, - "nauc_ndcg_at_20_std": 0.1976666061871801, - "nauc_ndcg_at_3_diff1": 0.29659323480247735, - "nauc_ndcg_at_3_max": 0.3710317896505282, - "nauc_ndcg_at_3_std": 0.1331916818585782, - "nauc_ndcg_at_5_diff1": 0.28580433201485833, - "nauc_ndcg_at_5_max": 0.3859484521144905, - "nauc_ndcg_at_5_std": 0.15313433521572764, - "nauc_precision_at_1000_diff1": 0.12752694892887595, - "nauc_precision_at_1000_max": 0.3590535825037547, - "nauc_precision_at_1000_std": 0.6227671512776303, - "nauc_precision_at_100_diff1": 0.18161894144710583, - "nauc_precision_at_100_max": 0.3507884567772516, - "nauc_precision_at_100_std": 0.39162073570098294, - "nauc_precision_at_10_diff1": 0.22500485735582146, - "nauc_precision_at_10_max": 0.37378539642037556, - "nauc_precision_at_10_std": 0.2035430362585049, - "nauc_precision_at_1_diff1": 0.3784400483602523, - "nauc_precision_at_1_max": 0.3504797969786945, - "nauc_precision_at_1_std": 0.10634115715764078, - "nauc_precision_at_20_diff1": 0.20677311842444712, - "nauc_precision_at_20_max": 0.3598552830533254, - "nauc_precision_at_20_std": 0.24219283310766115, - "nauc_precision_at_3_diff1": 0.26529469191714483, - "nauc_precision_at_3_max": 0.37325220096807005, - "nauc_precision_at_3_std": 0.14099019547945105, - "nauc_precision_at_5_diff1": 0.24620492543282482, - "nauc_precision_at_5_max": 0.3867515487955738, - "nauc_precision_at_5_std": 0.16846852569552279, - "nauc_recall_at_1000_diff1": 0.1275269489288761, - "nauc_recall_at_1000_max": 0.35905358250375646, - "nauc_recall_at_1000_std": 0.6227671512776317, - "nauc_recall_at_100_diff1": 0.1816189414471066, - "nauc_recall_at_100_max": 0.35078845677725146, - "nauc_recall_at_100_std": 0.3916207357009826, - "nauc_recall_at_10_diff1": 0.22500485735582146, - "nauc_recall_at_10_max": 0.37378539642037556, - "nauc_recall_at_10_std": 0.2035430362585049, - "nauc_recall_at_1_diff1": 0.37844004836025164, - "nauc_recall_at_1_max": 0.3504797969786941, - "nauc_recall_at_1_std": 0.1063411571576403, - "nauc_recall_at_20_diff1": 0.20677311842444712, - "nauc_recall_at_20_max": 0.3598552830533254, - "nauc_recall_at_20_std": 0.24219283310766115, - "nauc_recall_at_3_diff1": 0.2652946919171448, - "nauc_recall_at_3_max": 0.3732522009680695, - "nauc_recall_at_3_std": 0.1409901954794505, - "nauc_recall_at_5_diff1": 0.24620492543282482, - "nauc_recall_at_5_max": 0.3867515487955738, - "nauc_recall_at_5_std": 0.16846852569552279, - "ndcg_at_1": 0.4111, - "ndcg_at_10": 0.33866, - "ndcg_at_100": 0.44523, - "ndcg_at_1000": 0.49782, - "ndcg_at_20": 0.37695, - "ndcg_at_3": 0.33845, - "ndcg_at_5": 0.29078, - "precision_at_1": 0.4111, - "precision_at_10": 0.17182, - "precision_at_100": 0.03266, - "precision_at_1000": 0.00451, - "precision_at_20": 0.10825, - "precision_at_3": 0.31717, - "precision_at_5": 0.25644, - "recall_at_1": 0.08222, - "recall_at_10": 0.34363, - "recall_at_100": 0.65316, - "recall_at_1000": 0.90261, - "recall_at_20": 0.43299, - "recall_at_3": 0.1903, - "recall_at_5": 0.25644 - } - ] - }, - "task_name": "Flickr30kI2TRetrieval" -} \ No newline at end of file diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json deleted file mode 100644 index 60797e4e6b..0000000000 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/Flickr30kT2IRetrieval.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "dataset_revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", - "evaluation_time": 3504.4338762760162, - "kg_co2_emissions": null, - "mteb_version": "1.12.90", - "scores": { - "test": [ - { - "cv_recall_at_1": 0.2175, - "cv_recall_at_10": 0.50718, - "cv_recall_at_100": 0.80839, - "cv_recall_at_1000": 0.96492, - "cv_recall_at_20": 0.60324, - "cv_recall_at_3": 0.34774, - "cv_recall_at_5": 0.41235, - "hf_subset": "default", - "languages": [ - "eng-Latn" - ], - "main_score": 0.3504, - "map_at_1": 0.2175, - "map_at_10": 0.3017, - "map_at_100": 0.31355, - "map_at_1000": 0.31429, - "map_at_20": 0.30838, - "map_at_3": 0.2743, - "map_at_5": 0.28901, - "mrr_at_1": 0.21749532469207455, - "mrr_at_10": 0.3017006349410669, - "mrr_at_100": 0.3135464607899773, - "mrr_at_1000": 0.31428602518127013, - "mrr_at_20": 0.30838107727990743, - "mrr_at_3": 0.2743008533780535, - "mrr_at_5": 0.28900711506631577, - "nauc_cv_recall_at_1000_diff1": 0.2632892740429544, - "nauc_cv_recall_at_1000_max": 0.4683201344263211, - "nauc_cv_recall_at_1000_std": 0.7033795380897453, - "nauc_cv_recall_at_100_diff1": 0.2647647634897656, - "nauc_cv_recall_at_100_max": 0.34490671196475553, - "nauc_cv_recall_at_100_std": 0.3944496412003796, - "nauc_cv_recall_at_10_diff1": 0.2995130617977379, - "nauc_cv_recall_at_10_max": 0.28870905172779393, - "nauc_cv_recall_at_10_std": 0.13919504988489, - "nauc_cv_recall_at_1_diff1": 0.46334150143887837, - "nauc_cv_recall_at_1_max": 0.3188934433784608, - "nauc_cv_recall_at_1_std": 0.03870010544883344, - "nauc_cv_recall_at_20_diff1": 0.2830079535289093, - "nauc_cv_recall_at_20_max": 0.29612199469809664, - "nauc_cv_recall_at_20_std": 0.19159692443206613, - "nauc_cv_recall_at_3_diff1": 0.3535295484138601, - "nauc_cv_recall_at_3_max": 0.29806929455574155, - "nauc_cv_recall_at_3_std": 0.07658894036209522, - "nauc_cv_recall_at_5_diff1": 0.32210572836847934, - "nauc_cv_recall_at_5_max": 0.2888711825797017, - "nauc_cv_recall_at_5_std": 0.09843675944719517, - "nauc_map_at_1000_diff1": 0.39832464563018605, - "nauc_map_at_1000_max": 0.3074377337641309, - "nauc_map_at_1000_std": 0.07177029946282956, - "nauc_map_at_100_diff1": 0.3980484146317034, - "nauc_map_at_100_max": 0.3074089025502927, - "nauc_map_at_100_std": 0.0721090495580087, - "nauc_map_at_10_diff1": 0.39759464570461306, - "nauc_map_at_10_max": 0.3059467184032683, - "nauc_map_at_10_std": 0.06729002523587067, - "nauc_map_at_1_diff1": 0.46334150143887837, - "nauc_map_at_1_max": 0.3188934433784608, - "nauc_map_at_1_std": 0.03870010544883344, - "nauc_map_at_20_diff1": 0.39724585077726443, - "nauc_map_at_20_max": 0.30662830181613326, - "nauc_map_at_20_std": 0.07029104503264444, - "nauc_map_at_3_diff1": 0.4087813508135372, - "nauc_map_at_3_max": 0.3082838424431037, - "nauc_map_at_3_std": 0.05612029637486883, - "nauc_map_at_5_diff1": 0.40044699102255726, - "nauc_map_at_5_max": 0.3058538687154621, - "nauc_map_at_5_std": 0.06168460440199823, - "nauc_mrr_at_1000_diff1": 0.3983251214518209, - "nauc_mrr_at_1000_max": 0.3074398467997274, - "nauc_mrr_at_1000_std": 0.0717737083834719, - "nauc_mrr_at_100_diff1": 0.3980488893862368, - "nauc_mrr_at_100_max": 0.3074110112149868, - "nauc_mrr_at_100_std": 0.07211245146163399, - "nauc_mrr_at_10_diff1": 0.39759509662724174, - "nauc_mrr_at_10_max": 0.3059487942694512, - "nauc_mrr_at_10_std": 0.06729335845497784, - "nauc_mrr_at_1_diff1": 0.46334150143887837, - "nauc_mrr_at_1_max": 0.3188934433784608, - "nauc_mrr_at_1_std": 0.03870010544883344, - "nauc_mrr_at_20_diff1": 0.3972463051807796, - "nauc_mrr_at_20_max": 0.30663039310948675, - "nauc_mrr_at_20_std": 0.07029440252517179, - "nauc_mrr_at_3_diff1": 0.4087846165854502, - "nauc_mrr_at_3_max": 0.3082911152291177, - "nauc_mrr_at_3_std": 0.0561317481536657, - "nauc_mrr_at_5_diff1": 0.40044780533219726, - "nauc_mrr_at_5_max": 0.30585567761349425, - "nauc_mrr_at_5_std": 0.061687450057817825, - "nauc_ndcg_at_1000_diff1": 0.3822651584130574, - "nauc_ndcg_at_1000_max": 0.3096027445146429, - "nauc_ndcg_at_1000_std": 0.09905420670861774, - "nauc_ndcg_at_100_diff1": 0.3751136878938184, - "nauc_ndcg_at_100_max": 0.3096593022400737, - "nauc_ndcg_at_100_std": 0.1114725296151852, - "nauc_ndcg_at_10_diff1": 0.3722441527278821, - "nauc_ndcg_at_10_max": 0.30140653687780977, - "nauc_ndcg_at_10_std": 0.08492137004866748, - "nauc_ndcg_at_1_diff1": 0.46334150143887837, - "nauc_ndcg_at_1_max": 0.3188934433784608, - "nauc_ndcg_at_1_std": 0.03870010544883344, - "nauc_ndcg_at_20_diff1": 0.3702205468461526, - "nauc_ndcg_at_20_max": 0.30363863160972876, - "nauc_ndcg_at_20_std": 0.09630513131602707, - "nauc_ndcg_at_3_diff1": 0.39360911171689245, - "nauc_ndcg_at_3_max": 0.30546309152189277, - "nauc_ndcg_at_3_std": 0.061672579869672305, - "nauc_ndcg_at_5_diff1": 0.37922470007538506, - "nauc_ndcg_at_5_max": 0.3012774286231696, - "nauc_ndcg_at_5_std": 0.07144813701782884, - "nauc_precision_at_1000_diff1": 0.2632892740429434, - "nauc_precision_at_1000_max": 0.46832013442631265, - "nauc_precision_at_1000_std": 0.7033795380897392, - "nauc_precision_at_100_diff1": 0.26476476348976524, - "nauc_precision_at_100_max": 0.3449067119647543, - "nauc_precision_at_100_std": 0.3944496412003801, - "nauc_precision_at_10_diff1": 0.2995130617977381, - "nauc_precision_at_10_max": 0.28870905172779415, - "nauc_precision_at_10_std": 0.13919504988489012, - "nauc_precision_at_1_diff1": 0.46334150143887837, - "nauc_precision_at_1_max": 0.3188934433784608, - "nauc_precision_at_1_std": 0.03870010544883344, - "nauc_precision_at_20_diff1": 0.2830079535289092, - "nauc_precision_at_20_max": 0.29612199469809636, - "nauc_precision_at_20_std": 0.19159692443206594, - "nauc_precision_at_3_diff1": 0.3535203790281254, - "nauc_precision_at_3_max": 0.29804922704221115, - "nauc_precision_at_3_std": 0.07655745610155888, - "nauc_precision_at_5_diff1": 0.32210572836847917, - "nauc_precision_at_5_max": 0.2888711825797015, - "nauc_precision_at_5_std": 0.09843675944719478, - "nauc_recall_at_1000_diff1": 0.2632892740429544, - "nauc_recall_at_1000_max": 0.4683201344263211, - "nauc_recall_at_1000_std": 0.7033795380897453, - "nauc_recall_at_100_diff1": 0.2647647634897656, - "nauc_recall_at_100_max": 0.34490671196475553, - "nauc_recall_at_100_std": 0.3944496412003796, - "nauc_recall_at_10_diff1": 0.2995130617977379, - "nauc_recall_at_10_max": 0.28870905172779393, - "nauc_recall_at_10_std": 0.13919504988489, - "nauc_recall_at_1_diff1": 0.46334150143887837, - "nauc_recall_at_1_max": 0.3188934433784608, - "nauc_recall_at_1_std": 0.03870010544883344, - "nauc_recall_at_20_diff1": 0.2830079535289093, - "nauc_recall_at_20_max": 0.29612199469809664, - "nauc_recall_at_20_std": 0.19159692443206613, - "nauc_recall_at_3_diff1": 0.3535203790281251, - "nauc_recall_at_3_max": 0.29804922704221065, - "nauc_recall_at_3_std": 0.07655745610155873, - "nauc_recall_at_5_diff1": 0.32210572836847934, - "nauc_recall_at_5_max": 0.2888711825797017, - "nauc_recall_at_5_std": 0.09843675944719517, - "ndcg_at_1": 0.2175, - "ndcg_at_10": 0.3504, - "ndcg_at_100": 0.41258, - "ndcg_at_1000": 0.43242, - "ndcg_at_20": 0.3747, - "ndcg_at_3": 0.29313, - "ndcg_at_5": 0.31969, - "precision_at_1": 0.2175, - "precision_at_10": 0.05072, - "precision_at_100": 0.00808, - "precision_at_1000": 0.00096, - "precision_at_20": 0.03016, - "precision_at_3": 0.11591, - "precision_at_5": 0.08247, - "recall_at_1": 0.2175, - "recall_at_10": 0.50718, - "recall_at_100": 0.80839, - "recall_at_1000": 0.96492, - "recall_at_20": 0.60324, - "recall_at_3": 0.34773, - "recall_at_5": 0.41235 - } - ] - }, - "task_name": "Flickr30kT2IRetrieval" -} \ No newline at end of file From 460a2b8f42bb2c67d8772aee115128f91d403712 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 14 Nov 2024 10:50:25 +0000 Subject: [PATCH 72/73] change Flickr30k to test split --- mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py | 2 +- mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py index 3a33733e1f..6baea82b5b 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -11,7 +11,7 @@ class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", dataset={ "path": "JamieSJS/flickr30k", - "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + "revision": "5e89d348fc1cb7be65c80f5a7325236ed0ec572f", }, type="Any2AnyRetrieval", category="i2t", diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py index 585fcfc255..5209264bd3 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -11,7 +11,7 @@ class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", dataset={ "path": "JamieSJS/flickr30k", - "revision": "a4cf34ac79215f9e2cd6a10342d84f606fc41cc3", + "revision": "5e89d348fc1cb7be65c80f5a7325236ed0ec572f", }, type="Any2AnyRetrieval", category="t2i", From bada500889aca984798d0b637c5c492163066f58 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling Date: Thu, 14 Nov 2024 11:06:26 +0000 Subject: [PATCH 73/73] change flickr to test split --- mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py | 2 +- mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py index 6baea82b5b..6ba591cf12 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -11,7 +11,7 @@ class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", dataset={ "path": "JamieSJS/flickr30k", - "revision": "5e89d348fc1cb7be65c80f5a7325236ed0ec572f", + "revision": "24acb2d0b72e18b03388eb20a6225983c0e3f629", }, type="Any2AnyRetrieval", category="i2t", diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py index 5209264bd3..be56a554e4 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -11,7 +11,7 @@ class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): reference="https://www.semanticscholar.org/paper/From-image-descriptions-to-visual-denotations%3A-New-Young-Lai/44040913380206991b1991daf1192942e038fe31", dataset={ "path": "JamieSJS/flickr30k", - "revision": "5e89d348fc1cb7be65c80f5a7325236ed0ec572f", + "revision": "24acb2d0b72e18b03388eb20a6225983c0e3f629", }, type="Any2AnyRetrieval", category="t2i",