Skip to content

Commit

Permalink
[mieb] Update subtasks of BLINKIT2TMultiChoice and BLINKIT2IMultiChoi…
Browse files Browse the repository at this point in the history
…ce (#1403)

* wip: start adding BLIP models

* add other blip variants

* wip: add blip2_models.py

* make lint

* wip: implement blip2 wrapper

* feat: add blip2 models, still mismatched names

* fix: remove projections from image and text embeddings

* make lint

* wip: add coco BLIP2

* fix: BLIP2 better zero-shot classification without text_proj and vision_proj

* tidy blip2

* add imagenet-dog-15 dataset

* tidy and lint

* remove unused import

* add cluster_accuracy, ari and nmi to Image.ClusteringEvaluator

* add imagenet-10 clustering task

* add SOPI2IRetrieval

* add results forclip on ImageNet10Clustering and ImageNetDog15Clustering

* add SOPI2IRetrieval results for clip 32

* add results for clip vit 32/SOPI2IRetrieval

* resolve conflict

* add RP2kI2IRetrieval dataset

* add RP2kI2IRetrieval results with clip-vit-base-patch32

* update image retrieval __init__.py

* fix ImageTextPair dataloading for large datasets; more compositionality evaluation datasets

* add RP2kI2IRetrieval and METI2IRetrieval

* add METI2IRetreival

* add SOP results

* make lign

* new revision for METI2IRetrieval

* make lint

* reset corpus chunk size

* remove wrong classification import

* add Flickr30k T2I and I2T

* add Flickr30k T2I retriebal

* reduced-size MET revision

* fix: add Flickr30k T2I

* make lint

* add two landmark datasets and results

* add Sketchy i2i retrieval

* add task metadata

* add BLINKIT2IRetrieval dataset

* add BLINKIT2TRetrieval

* add ImageCoDeT2IRetrieval

* make lint

* add vizwiz retrieval and results

* fix vizwiz duplicate texts

* add new vizwiz results

* add VQA2 results

* add GLD v2 I2T retrieval

* add gld v2 i2i retrieval

* make lint

* add AbsTaskAny2AnyMultiChoice

* make lint

* remove GLDv2I2IRetrieval

* exclude AbsTaskAny2AnyMultiChoice from test_load_data

* fix e5v&vista

* remove duplicate corpus entries from BLINKIT2TRetreival dataset

* task type fix for running tasks

* update BLINKIT2T metadata

* fix wrong meta

* run mieb script

* split ROxford, RParis into easy, medium and hard

* make lint

* add BLINK as multi choice tasks

* fix: license metadata in wrong format

* remove null examples from corpus of ROxford and RParis

* fix: add/remove subtasks from BLINKIT2IMultiChoice and BLINKIT2TMultiChoice

* update blink metadata

* add updated BLINK results

---------

Co-authored-by: gowitheflow-1998 <[email protected]>
  • Loading branch information
Jamie-Stirling and gowitheflow-1998 authored Nov 7, 2024
1 parent 01b7f28 commit cdb92c6
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice):
reference="https://arxiv.org/abs/2404.12390",
dataset={
"path": "JamieSJS/blink-it2i-multi",
"revision": "780ade70cd769e586502a61dda903e525f945a45",
"revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53",
"trust_remote_code": True,
},
type="Any2AnyMultiChoice",
Expand All @@ -35,13 +35,13 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice):
}
""",
descriptive_stats={
"n_samples": {"test": 402},
"n_samples": {"test": 534},
"avg_character_length": {
"test": {
"average_document_length": 0.0,
"average_query_length": 0.0,
"num_documents": 804,
"num_queries": 402,
"num_documents": 1200,
"num_queries": 534,
"average_relevant_docs_per_query": 1,
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice):
reference="https://arxiv.org/abs/2404.12390",
dataset={
"path": "JamieSJS/blink-it2t-multi",
"revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9",
"revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55",
},
type="Any2AnyMultiChoice",
category="it2t",
Expand All @@ -34,13 +34,13 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice):
}
""",
descriptive_stats={
"n_samples": {"test": 1073},
"n_samples": {"test": 923},
"avg_character_length": {
"test": {
"average_document_length": 0.0,
"average_query_length": 0.0,
"num_documents": 26,
"num_queries": 1073,
"num_documents": 24,
"num_queries": 923,
"average_relevant_docs_per_query": 1,
}
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
{
"dataset_revision": "780ade70cd769e586502a61dda903e525f945a45",
"evaluation_time": 56.62301731109619,
"dataset_revision": "b7b46b72d1ed1fa44d25e2b9c4726afab4a7ce53",
"evaluation_time": 30.517717123031616,
"kg_co2_emissions": null,
"mteb_version": "1.12.90",
"scores": {
"test": [
{
"accuracy": 0.70149,
"accuracy": 0.6161,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.70149,
"mrr_at_1": 0.7014925373134329,
"mrr_at_10": 0.8507462686567164,
"mrr_at_100": 0.8507462686567164,
"mrr_at_1000": 0.8507462686567164,
"mrr_at_20": 0.8507462686567164,
"mrr_at_3": 0.8507462686567164,
"mrr_at_5": 0.8507462686567164,
"ndcg_at_1": 0.70149,
"ndcg_at_10": 0.88983,
"ndcg_at_100": 0.88983,
"ndcg_at_1000": 0.88983,
"ndcg_at_20": 0.88983,
"ndcg_at_3": 0.88983,
"ndcg_at_5": 0.88983
"main_score": 0.6161,
"mrr_at_1": 0.6161048689138576,
"mrr_at_10": 0.7762172284644191,
"mrr_at_100": 0.7762172284644191,
"mrr_at_1000": 0.7762172284644191,
"mrr_at_20": 0.7762172284644191,
"mrr_at_3": 0.7762172284644191,
"mrr_at_5": 0.7762172284644191,
"ndcg_at_1": 0.6161,
"ndcg_at_10": 0.81993,
"ndcg_at_100": 0.81993,
"ndcg_at_1000": 0.81993,
"ndcg_at_20": 0.81993,
"ndcg_at_3": 0.81993,
"ndcg_at_5": 0.81993
}
]
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
{
"dataset_revision": "b6e18eba186cada040ddb72e8e3cb92edd7ca5e9",
"evaluation_time": 43.71325731277466,
"dataset_revision": "ae713b03ae68e343f16c3bcdbd1b1ee760975d55",
"evaluation_time": 10.223464965820312,
"kg_co2_emissions": null,
"mteb_version": "1.12.90",
"scores": {
"test": [
{
"accuracy": 0.38397,
"accuracy": 0.36945,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.38397,
"mrr_at_1": 0.38397017707362535,
"mrr_at_10": 0.650512581547066,
"mrr_at_100": 0.650512581547066,
"mrr_at_1000": 0.650512581547066,
"mrr_at_20": 0.650512581547066,
"mrr_at_3": 0.6295433364398889,
"mrr_at_5": 0.650512581547066,
"ndcg_at_1": 0.38397,
"ndcg_at_10": 0.73974,
"ndcg_at_100": 0.73974,
"ndcg_at_1000": 0.73974,
"ndcg_at_20": 0.73974,
"ndcg_at_3": 0.70361,
"ndcg_at_5": 0.73974
"main_score": 0.36945,
"mrr_at_1": 0.3694474539544962,
"mrr_at_10": 0.6365113759479949,
"mrr_at_100": 0.6365113759479949,
"mrr_at_1000": 0.6365113759479949,
"mrr_at_20": 0.6365113759479949,
"mrr_at_3": 0.61213434452871,
"mrr_at_5": 0.6365113759479949,
"ndcg_at_1": 0.36945,
"ndcg_at_10": 0.72903,
"ndcg_at_100": 0.72903,
"ndcg_at_1000": 0.72903,
"ndcg_at_20": 0.72903,
"ndcg_at_3": 0.68704,
"ndcg_at_5": 0.72903
}
]
},
Expand Down

0 comments on commit cdb92c6

Please sign in to comment.