Merge pull request #11 from Samoed/add_rusian_models

add russian models results
embeddings-benchmark · Aug 6, 2024 · 0da1545 · 0da1545
2 parents d42244d + c2e06a9
commit 0da1545
Show file tree

Hide file tree

Showing 726 changed files with 165,637 additions and 13,586 deletions.
diff --git a/paths.json b/paths.json
diff --git a/results.py b/results.py
@@ -33,11 +33,13 @@
 # Use "validation" split instead
 VALIDATION_SPLIT = ["AFQMC", "Cmnli", "IFlyTek", "LEMBSummScreenFDRetrieval", "MSMARCO", "MSMARCO-PL", "MultilingualSentiment", "Ocnli", "TNews"]
 # Use "dev" split instead
-DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "MSMARCO-PL", "T2Reranking", "T2Retrieval", "VideoRetrieval"]
+DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "MSMARCO-PL", "T2Reranking", "T2Retrieval", "VideoRetrieval", "TERRa",]
 # Use "test.full" split
 TESTFULL_SPLIT = ["OpusparcusPC"]
 # Use "standard" split
 STANDARD_SPLIT = ["BrightRetrieval"]
+# Use "devtest" split
+DEVTEST_SPLIT = ["FloresBitextMining"]
 
 TEST_AVG_SPLIT = {
     "LEMBNeedleRetrieval": ["test_256", "test_512", "test_1024", "test_2048", "test_4096", "test_8192", "test_16384", "test_32768"],
@@ -235,6 +237,15 @@
     "voyage-multilingual-2",
     "xlm-roberta-base",
     "xlm-roberta-large",
+    "deberta-v1-base",
+    "USER-bge-m3",
+    "USER-base",
+    "rubert-tiny-turbo",
+    "LaBSE-ru-turbo",
+    "distilrubert-small-cased-conversational",
+    "rubert-base-cased",
+    "rubert-base-cased-sentence",
+    "LaBSE-en-ru",
 ]
 
 
@@ -324,6 +335,8 @@ def _generate_examples(self, filepath):
                     split = "test.full"
                 elif (ds_name in STANDARD_SPLIT) and ("standard" in res_dict):
                     split = "standard"
+                elif (ds_name in DEVTEST_SPLIT) and ("devtest" in res_dict):
+                    split = "devtest"
                 elif (ds_name in TEST_AVG_SPLIT):
                     # Average splits
                     res_dict["test_avg"] = {}
@@ -381,7 +394,7 @@ def _generate_examples(self, filepath):
                                     })
                             else:
                                 if not isinstance(score, float): 
-                                    print(f'WARNING: Expected float, got {score} for {ds_name} {lang} {metric} {k}')
+                                    print(f'WARNING: Expected float, got {score} for {ds_name} {lang} {metric}')
                                     continue
                                 out.append({
                                     "mteb_dataset_name": ds_name,
@@ -415,4 +428,4 @@ def _generate_examples(self, filepath):
 
 # NOTE: for generating the new paths
 if __name__ == "__main__":
-    get_paths()
+    get_paths()
diff --git a/results/LaBSE-en-ru/cf0714e606d4af551e14ad69a7929cd6b0da7f7e/BUCC.v2.json b/results/LaBSE-en-ru/cf0714e606d4af551e14ad69a7929cd6b0da7f7e/BUCC.v2.json
@@ -0,0 +1,23 @@
+{
+  "dataset_revision": "1739dc11ffe9b7bfccd7f3d585aeb4c544fc6677",
+  "evaluation_time": 17.75709819793701,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.12.85",
+  "scores": {
+    "test": [
+      {
+        "accuracy": 0.9801177693107032,
+        "f1": 0.9738298118000231,
+        "hf_subset": "ru-en",
+        "languages": [
+          "rus-Cyrl",
+          "eng-Latn"
+        ],
+        "main_score": 0.9738298118000231,
+        "precision": 0.9707250894815842,
+        "recall": 0.9801177693107032
+      }
+    ]
+  },
+  "task_name": "BUCC.v2"
+}