From cdb92c659bb7d3d0402c88c9e9c69f2d71b221e1 Mon Sep 17 00:00:00 2001 From: Jamie-Stirling <36764530+Jamie-Stirling@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:45:21 +0000 Subject: [PATCH] [mieb] Update subtasks of BLINKIT2TMultiChoice and BLINKIT2IMultiChoice (#1403) * wip: start adding BLIP models * add other blip variants * wip: add blip2_models.py * make lint * wip: implement blip2 wrapper * feat: add blip2 models, still mismatched names * fix: remove projections from image and text embeddings * make lint * wip: add coco BLIP2 * fix: BLIP2 better zero-shot classification without text_proj and vision_proj * tidy blip2 * add imagenet-dog-15 dataset * tidy and lint * remove unused import * add cluster_accuracy, ari and nmi to Image.ClusteringEvaluator * add imagenet-10 clustering task * add SOPI2IRetrieval * add results forclip on ImageNet10Clustering and ImageNetDog15Clustering * add SOPI2IRetrieval results for clip 32 * add results for clip vit 32/SOPI2IRetrieval * resolve conflict * add RP2kI2IRetrieval dataset * add RP2kI2IRetrieval results with clip-vit-base-patch32 * update image retrieval __init__.py * fix ImageTextPair dataloading for large datasets; more compositionality evaluation datasets * add RP2kI2IRetrieval and METI2IRetrieval * add METI2IRetreival * add SOP results * make lign * new revision for METI2IRetrieval * make lint * reset corpus chunk size * remove wrong classification import * add Flickr30k T2I and I2T * add Flickr30k T2I retriebal * reduced-size MET revision * fix: add Flickr30k T2I * make lint * add two landmark datasets and results * add Sketchy i2i retrieval * add task metadata * add BLINKIT2IRetrieval dataset * add BLINKIT2TRetrieval * add ImageCoDeT2IRetrieval * make lint * add vizwiz retrieval and results * fix vizwiz duplicate texts * add new vizwiz results * add VQA2 results * add GLD v2 I2T retrieval * add gld v2 i2i retrieval * make lint * add AbsTaskAny2AnyMultiChoice * make lint * remove GLDv2I2IRetrieval * exclude AbsTaskAny2AnyMultiChoice from test_load_data * fix e5v&vista * remove duplicate corpus entries from BLINKIT2TRetreival dataset * task type fix for running tasks * update BLINKIT2T metadata * fix wrong meta * run mieb script * split ROxford, RParis into easy, medium and hard * make lint * add BLINK as multi choice tasks * fix: license metadata in wrong format * remove null examples from corpus of ROxford and RParis * fix: add/remove subtasks from BLINKIT2IMultiChoice and BLINKIT2TMultiChoice * update blink metadata * add updated BLINK results --------- Co-authored-by: gowitheflow-1998 --- .../eng/BLINKIT2IMultiChoice.py | 8 ++--- .../eng/BLINKIT2TMultiChoice.py | 8 ++--- .../BLINKIT2IMultiChoice.json | 36 +++++++++---------- .../BLINKIT2TMultiChoice.json | 36 +++++++++---------- 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index d600eaa4f..98b0a0120 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2i-multi", - "revision": "780ade70cd769e586502a61dda903e525f945a45", + "revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", "trust_remote_code": True, }, type="Any2AnyMultiChoice", @@ -35,13 +35,13 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 402}, + "n_samples": {"test": 534}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 804, - "num_queries": 402, + "num_documents": 1200, + "num_queries": 534, "average_relevant_docs_per_query": 1, } }, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index fe37216de..60f42b8b0 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2t-multi", - "revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", + "revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", }, type="Any2AnyMultiChoice", category="it2t", @@ -34,13 +34,13 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 1073}, + "n_samples": {"test": 923}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 26, - "num_queries": 1073, + "num_documents": 24, + "num_queries": 923, "average_relevant_docs_per_query": 1, } }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json index 648d3aa59..6b300b821 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "780ade70cd769e586502a61dda903e525f945a45", - "evaluation_time": 56.62301731109619, + "dataset_revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", + "evaluation_time": 30.517717123031616, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.70149, + "accuracy": 0.6161, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.70149, - "mrr_at_1": 0.7014925373134329, - "mrr_at_10": 0.8507462686567164, - "mrr_at_100": 0.8507462686567164, - "mrr_at_1000": 0.8507462686567164, - "mrr_at_20": 0.8507462686567164, - "mrr_at_3": 0.8507462686567164, - "mrr_at_5": 0.8507462686567164, - "ndcg_at_1": 0.70149, - "ndcg_at_10": 0.88983, - "ndcg_at_100": 0.88983, - "ndcg_at_1000": 0.88983, - "ndcg_at_20": 0.88983, - "ndcg_at_3": 0.88983, - "ndcg_at_5": 0.88983 + "main_score": 0.6161, + "mrr_at_1": 0.6161048689138576, + "mrr_at_10": 0.7762172284644191, + "mrr_at_100": 0.7762172284644191, + "mrr_at_1000": 0.7762172284644191, + "mrr_at_20": 0.7762172284644191, + "mrr_at_3": 0.7762172284644191, + "mrr_at_5": 0.7762172284644191, + "ndcg_at_1": 0.6161, + "ndcg_at_10": 0.81993, + "ndcg_at_100": 0.81993, + "ndcg_at_1000": 0.81993, + "ndcg_at_20": 0.81993, + "ndcg_at_3": 0.81993, + "ndcg_at_5": 0.81993 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json index bda6cd2cb..55aca4a70 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", - "evaluation_time": 43.71325731277466, + "dataset_revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", + "evaluation_time": 10.223464965820312, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.38397, + "accuracy": 0.36945, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.38397, - "mrr_at_1": 0.38397017707362535, - "mrr_at_10": 0.650512581547066, - "mrr_at_100": 0.650512581547066, - "mrr_at_1000": 0.650512581547066, - "mrr_at_20": 0.650512581547066, - "mrr_at_3": 0.6295433364398889, - "mrr_at_5": 0.650512581547066, - "ndcg_at_1": 0.38397, - "ndcg_at_10": 0.73974, - "ndcg_at_100": 0.73974, - "ndcg_at_1000": 0.73974, - "ndcg_at_20": 0.73974, - "ndcg_at_3": 0.70361, - "ndcg_at_5": 0.73974 + "main_score": 0.36945, + "mrr_at_1": 0.3694474539544962, + "mrr_at_10": 0.6365113759479949, + "mrr_at_100": 0.6365113759479949, + "mrr_at_1000": 0.6365113759479949, + "mrr_at_20": 0.6365113759479949, + "mrr_at_3": 0.61213434452871, + "mrr_at_5": 0.6365113759479949, + "ndcg_at_1": 0.36945, + "ndcg_at_10": 0.72903, + "ndcg_at_100": 0.72903, + "ndcg_at_1000": 0.72903, + "ndcg_at_20": 0.72903, + "ndcg_at_3": 0.68704, + "ndcg_at_5": 0.72903 } ] },