diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index d600eaa4f..98b0a0120 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2i-multi", - "revision": "780ade70cd769e586502a61dda903e525f945a45", + "revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", "trust_remote_code": True, }, type="Any2AnyMultiChoice", @@ -35,13 +35,13 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 402}, + "n_samples": {"test": 534}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 804, - "num_queries": 402, + "num_documents": 1200, + "num_queries": 534, "average_relevant_docs_per_query": 1, } }, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index fe37216de..60f42b8b0 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -11,7 +11,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): reference="https://arxiv.org/abs/2404.12390", dataset={ "path": "JamieSJS/blink-it2t-multi", - "revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", + "revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", }, type="Any2AnyMultiChoice", category="it2t", @@ -34,13 +34,13 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 1073}, + "n_samples": {"test": 923}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 26, - "num_queries": 1073, + "num_documents": 24, + "num_queries": 923, "average_relevant_docs_per_query": 1, } }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json index 648d3aa59..6b300b821 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2IMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "780ade70cd769e586502a61dda903e525f945a45", - "evaluation_time": 56.62301731109619, + "dataset_revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53", + "evaluation_time": 30.517717123031616, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.70149, + "accuracy": 0.6161, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.70149, - "mrr_at_1": 0.7014925373134329, - "mrr_at_10": 0.8507462686567164, - "mrr_at_100": 0.8507462686567164, - "mrr_at_1000": 0.8507462686567164, - "mrr_at_20": 0.8507462686567164, - "mrr_at_3": 0.8507462686567164, - "mrr_at_5": 0.8507462686567164, - "ndcg_at_1": 0.70149, - "ndcg_at_10": 0.88983, - "ndcg_at_100": 0.88983, - "ndcg_at_1000": 0.88983, - "ndcg_at_20": 0.88983, - "ndcg_at_3": 0.88983, - "ndcg_at_5": 0.88983 + "main_score": 0.6161, + "mrr_at_1": 0.6161048689138576, + "mrr_at_10": 0.7762172284644191, + "mrr_at_100": 0.7762172284644191, + "mrr_at_1000": 0.7762172284644191, + "mrr_at_20": 0.7762172284644191, + "mrr_at_3": 0.7762172284644191, + "mrr_at_5": 0.7762172284644191, + "ndcg_at_1": 0.6161, + "ndcg_at_10": 0.81993, + "ndcg_at_100": 0.81993, + "ndcg_at_1000": 0.81993, + "ndcg_at_20": 0.81993, + "ndcg_at_3": 0.81993, + "ndcg_at_5": 0.81993 } ] }, diff --git a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json index bda6cd2cb..55aca4a70 100644 --- a/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json +++ b/results-mieb/openai__clip-vit-base-patch32/3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268/BLINKIT2TMultiChoice.json @@ -1,31 +1,31 @@ { - "dataset_revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9", - "evaluation_time": 43.71325731277466, + "dataset_revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55", + "evaluation_time": 10.223464965820312, "kg_co2_emissions": null, "mteb_version": "1.12.90", "scores": { "test": [ { - "accuracy": 0.38397, + "accuracy": 0.36945, "hf_subset": "default", "languages": [ "eng-Latn" ], - "main_score": 0.38397, - "mrr_at_1": 0.38397017707362535, - "mrr_at_10": 0.650512581547066, - "mrr_at_100": 0.650512581547066, - "mrr_at_1000": 0.650512581547066, - "mrr_at_20": 0.650512581547066, - "mrr_at_3": 0.6295433364398889, - "mrr_at_5": 0.650512581547066, - "ndcg_at_1": 0.38397, - "ndcg_at_10": 0.73974, - "ndcg_at_100": 0.73974, - "ndcg_at_1000": 0.73974, - "ndcg_at_20": 0.73974, - "ndcg_at_3": 0.70361, - "ndcg_at_5": 0.73974 + "main_score": 0.36945, + "mrr_at_1": 0.3694474539544962, + "mrr_at_10": 0.6365113759479949, + "mrr_at_100": 0.6365113759479949, + "mrr_at_1000": 0.6365113759479949, + "mrr_at_20": 0.6365113759479949, + "mrr_at_3": 0.61213434452871, + "mrr_at_5": 0.6365113759479949, + "ndcg_at_1": 0.36945, + "ndcg_at_10": 0.72903, + "ndcg_at_100": 0.72903, + "ndcg_at_1000": 0.72903, + "ndcg_at_20": 0.72903, + "ndcg_at_3": 0.68704, + "ndcg_at_5": 0.72903 } ] },