Merge branch 'main' of https://github.com/embeddings-benchmark/mteb

embeddings-benchmark · Apr 24, 2024 · 10f3d4b · 10f3d4b
2 parents 36312ac + 32ba5b1
commit 10f3d4b
Show file tree

Hide file tree

Showing 67 changed files with 2,489 additions and 259 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -0,0 +1,41 @@
+# GitHub action for the task table generation.
+
+name: documentation
+
+on:
+  push:
+    branches: [main]
+
+jobs:
+  create-table:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          token: ${{ secrets.RELEASE }}
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+          cache: "pip"
+
+      - name: Install dependencies
+        run: |
+          make install
+
+      - name: Create table
+        run: python docs/create_tasks_table.py
+
+      - name: Push table
+        run: |
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+          # Check if changes exist
+          if git diff --quiet; then
+            echo "No changes detected"
+          else
+            git add docs/tasks.md
+            git commit -m "Update tasks table"
+            git push
+          fi
+          
diff --git a/README.md b/README.md
@@ -133,15 +133,18 @@ Models should implement the following interface, implementing an `encode` functi
 
 ```python
 class MyModel():
-    def encode(self, sentences: list[str], **kwargs) -> list[np.ndarray] | list[torch.Tensor]:
-        """
-        Returns a list of embeddings for the given sentences.
-        
+    def encode(
+        self, sentences: list[str], prompt: str, **kwargs: Any
+    ) -> torch.Tensor | np.ndarray:
+        """Encodes the given sentences using the encoder.
+
         Args:
-            sentences: List of sentences to encode
+            sentences: The sentences to encode.
+            prompt: The prompt to use. Useful for prompt-based models.
+            **kwargs: Additional arguments to pass to the encoder.
 
         Returns:
-            List of embeddings for the given sentences
+            The encoded sentences.
         """
         pass
 
@@ -208,13 +211,15 @@ evaluation.run(model)
 | 📈 [Leaderboard] | The interactive leaderboard of the benchmark |
 | 🤖 [Adding a model] | Information related to how to submit a model to the leaderboard |
 | 👩‍💻 [Adding a dataset] | How to add a new task/dataset to MTEB | 
+| 👩‍💻 [Adding a leaderboard tab] | How to add a new leaderboard tab to MTEB | 
 | 🤝  [Contributing] | How to contribute to MTEB and set it up for development |
 <!-- | 🌐 [MMTEB] | An open-source effort to extend MTEB to cover a broad set of languages |   -->
 
 [Tasks]: docs/tasks.md
 [Contributing]: CONTRIBUTING.md
 [Adding a model]: docs/adding_a_model.md
 [Adding a dataset]: docs/adding_a_dataset.md
+[Adding a leaderboard tab]: docs/adding_a_leaderboard_tab.md
 [Leaderboard]: https://huggingface.co/spaces/mteb/leaderboard
 [MMTEB]: docs/mmteb/readme.md
 

diff --git a/docs/adding_a_leaderboard_tab.md b/docs/adding_a_leaderboard_tab.md
@@ -0,0 +1,15 @@
+## Adding a new Leaderboard tab
+
+The MTEB Leaderboard is available [here](https://huggingface.co/spaces/mteb/leaderboard) and we love new leaderboard tabs. To add a new leaderboard tab:
+
+1. Open a PR in https://hf.co/datasets/mteb/results with:
+- All results added in existing model folders or new folders
+- Updated paths.json (see snippet results.py)
+- If adding any new models, their names added to results.py
+- If you have access to all models you are adding, you can also [add results via the metadata](https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md) for all of them / some of them
+2. Open a PR at https://huggingface.co/spaces/mteb/leaderboard modifying app.py to add your tab:
+- Add any new models & their specs to the global lists
+- Add your tab, credits etc to where the other tabs are defined
+- If you're adding new results to existing models, remove those models from `EXTERNAL_MODEL_RESULTS.json` such that they can be reloaded with the new results and are not cached.
+- You may also have to uncomment `, download_mode='force_redownload', verification_mode="no_checks")` where the datasets are loaded to experiment locally without caching of results
+- Test that it runs & works locally as you desire with python app.py, **please add screenshots to the PR**
diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+import mteb
+
+
+def author_from_bibtex(bibtex: str | None) -> str:
+    """Create (Authors, Year) from bibtex entry (author = {Authors}, year = {Year})"""
+    if bibtex is None:
+        return ""
+    # get authors from bibtex (author = {Authors} or author={Authors})
+    authors = re.search(r"author\s*=\s*{([^}]*)}", bibtex)
+    if authors is None:
+        return ""
+    authors = authors.group(1)
+    authors = [a.split(", ") for a in authors.split(" and ")]
+    author_str_w_et_al = (
+        authors[0][0] + " et al." if len(authors[0]) > 1 else authors[0][0]
+    )
+    # replace any newline characters
+    author_str_w_et_al = author_str_w_et_al.replace("\n", " ")
+    year = re.search(r"year\s*=\s*{([^}]*)}", bibtex)
+    if year is None:
+        return ""
+    year_str = year.group(1)
+    return f" ({author_str_w_et_al}, {year_str})"
+
+
+def task_to_markdown_row(task: mteb.AbsTask) -> str:
+    name = task.metadata.name
+    name_w_reference = (
+        f"[{name}]({task.metadata.reference})" if task.metadata.reference else name
+    )
+    domains = (
+        "[" + ", ".join(task.metadata.domains) + "]" if task.metadata.domains else ""
+    )
+    n_samples = task.metadata.n_samples if task.metadata.n_samples else ""
+    avg_character_length = (
+        task.metadata.avg_character_length if task.metadata.avg_character_length else ""
+    )
+
+    name_w_reference += author_from_bibtex(task.metadata.bibtex_citation)
+
+    return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"
+
+
+def create_tasks_table(tasks: list[mteb.AbsTask]) -> str:
+    table = """
+| Name | Languages | Type | Category | Domains | # Samples | Avg. Length (Char.) |
+|------|-----------|------|----------|---------|-----------|---------------------|
+"""
+    for task in tasks:
+        table += task_to_markdown_row(task) + "\n"
+    return table
+
+
+def insert_table(file_path, table):
+    """Insert table in the in <!-- TABLE START --> and <!-- TABLE END -->"""
+    with open(file_path, "r") as file:
+        md = file.read()
+
+    start = "<!-- TABLE START -->"
+    end = "<!-- TABLE END -->"
+
+    md = md.replace(md[md.index(start) + len(start) : md.index(end)], table)
+
+    with open(file_path, "w") as file:
+        file.write(md)
+
+
+def main():
+    tasks = mteb.get_tasks()
+    tasks = sorted(tasks, key=lambda x: x.metadata.name)
+
+    table = create_tasks_table(tasks)
+
+    file_path = Path(__file__).parent / "tasks.md"
+
+    insert_table(file_path, table)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/mmteb/points/439.jsonl b/docs/mmteb/points/439.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "rbroc", "New dataset": 2}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/467.jsonl b/docs/mmteb/points/467.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "KennethEnevoldsen", "Coordination": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/478.jsonl b/docs/mmteb/points/478.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "dipam7", "New dataset": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/491.jsonl b/docs/mmteb/points/491.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "Akash190104", "New dataset": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/492.jsonl b/docs/mmteb/points/492.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "Akash190104", "New dataset": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/502.jsonl b/docs/mmteb/points/502.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "bp-high", "New dataset": 6}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/524.jsonl b/docs/mmteb/points/524.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "jaygala24", "New dataset": 30}
+{"GitHub": "digantamisra98", "New dataset": 20}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/525.jsonl b/docs/mmteb/points/525.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "KennethEnevoldsen", "Bug fixes": 3}
diff --git a/docs/mmteb/points/532.jsonl b/docs/mmteb/points/532.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "jaygala24", "New dataset": 36}
+{"GitHub": "digantamisra98", "New dataset": 18}
+{"GitHub": "asparius", "Review PR": 2}
diff --git a/docs/mmteb/points/533.jsonl b/docs/mmteb/points/533.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "asparius", "New dataset": 6}
diff --git a/docs/mmteb/points/536.jsonl b/docs/mmteb/points/536.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "Akash190104", "New dataset": 2}
+{"GitHub": "asparius", "Review PR": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/537.jsonl b/docs/mmteb/points/537.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "asparius", "New dataset": 6}
diff --git a/docs/mmteb/points_table.md b/docs/mmteb/points_table.md
@@ -4,46 +4,49 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended
 
  | GitHub            |   New dataset |   Review PR |   Bug fixes |   Dataset annotations |   Coordination |   Running Models |   New task |   Total |
 |:------------------|--------------:|------------:|------------:|----------------------:|---------------:|-----------------:|-----------:|--------:|
-| KennethEnevoldsen |            60 |         102 |          30 |                     8 |              9 |                0 |          0 |     209 |
+| KennethEnevoldsen |            60 |         106 |          33 |                     8 |             11 |                0 |          0 |     218 |
 | imenelydiaker     |            90 |          54 |           5 |                     0 |              0 |                0 |          0 |     149 |
-| isaac-chung       |            66 |          54 |           6 |                     0 |              4 |                0 |          0 |     130 |
+| isaac-chung       |            66 |          72 |           6 |                     0 |              4 |                0 |          0 |     148 |
+| jaygala24         |           117 |           0 |           0 |                     0 |              0 |                0 |          0 |     117 |
 | wissam-sib        |            88 |           2 |           0 |                     0 |              0 |                0 |          0 |      90 |
+| MathieuCiancone   |            88 |           0 |           0 |                     0 |              0 |                0 |          0 |      88 |
 | schmarion         |            88 |           0 |           0 |                     0 |              0 |                0 |          0 |      88 |
 | GabrielSequeira   |            88 |           0 |           0 |                     0 |              0 |                0 |          0 |      88 |
-| MathieuCiancone   |            88 |           0 |           0 |                     0 |              0 |                0 |          0 |      88 |
+| digantamisra98    |            71 |           0 |           0 |                     0 |              0 |                0 |          0 |      71 |
 | Rysias            |            58 |           0 |           0 |                     0 |              0 |                0 |          0 |      58 |
-| jaygala24         |            51 |           0 |           0 |                     0 |              0 |                0 |          0 |      51 |
 | staoxiao          |            50 |           0 |           0 |                     0 |              0 |                0 |          0 |      50 |
 | x-tabdeveloping   |            48 |           0 |           0 |                     0 |              1 |                0 |          0 |      49 |
 | dokato            |            38 |           0 |           0 |                     0 |              0 |                0 |          0 |      38 |
-| digantamisra98    |            33 |           0 |           0 |                     0 |              0 |                0 |          0 |      33 |
+| asparius          |            32 |           4 |           0 |                     0 |              0 |                0 |          0 |      36 |
 | rafalposwiata     |            32 |           0 |           0 |                     0 |              0 |                0 |          0 |      32 |
 | orionw            |             0 |           0 |          20 |                     0 |              0 |                0 |         10 |      30 |
 | violenil          |            26 |           0 |           0 |                     0 |              0 |                0 |          0 |      26 |
 | Muennighoff       |             0 |          24 |           0 |                     0 |              0 |                0 |          0 |      24 |
-| asparius          |            20 |           0 |           0 |                     0 |              0 |                0 |          0 |      20 |
+| rbroc             |            20 |           0 |           0 |                     0 |              0 |                0 |          0 |      20 |
 | manandey          |            18 |           0 |           0 |                     0 |              0 |                0 |          0 |      18 |
-| rbroc             |            18 |           0 |           0 |                     0 |              0 |                0 |          0 |      18 |
 | MartinBernstorff  |             2 |           8 |           7 |                     0 |              0 |                0 |          0 |      17 |
 | taeminlee         |            16 |           0 |           0 |                     0 |              0 |                0 |          0 |      16 |
-| mmhamdy           |            14 |           0 |           0 |                     0 |              0 |                0 |          0 |      14 |
 | taidnguyen        |            14 |           0 |           0 |                     0 |              0 |                0 |          0 |      14 |
 | Sakshamrzt        |            10 |           4 |           0 |                     0 |              0 |                0 |          0 |      14 |
+| mmhamdy           |            14 |           0 |           0 |                     0 |              0 |                0 |          0 |      14 |
 | slvnwhrl          |            12 |           0 |           0 |                     0 |              0 |                0 |          0 |      12 |
-| guenthermi        |            12 |           0 |           0 |                     0 |              0 |                0 |          0 |      12 |
 | dwzhu-pku         |            12 |           0 |           0 |                     0 |              0 |                0 |          0 |      12 |
-| xu3kev            |            10 |           0 |           0 |                     0 |              0 |                0 |          0 |      10 |
+| guenthermi        |            12 |           0 |           0 |                     0 |              0 |                0 |          0 |      12 |
 | guangyusong       |            10 |           0 |           0 |                     0 |              0 |                0 |          0 |      10 |
+| xu3kev            |            10 |           0 |           0 |                     0 |              0 |                0 |          0 |      10 |
 | cassanof          |             6 |           0 |           1 |                     0 |              0 |                1 |          0 |       8 |
 | SaitejaUtpala     |             8 |           0 |           0 |                     0 |              0 |                0 |          0 |       8 |
+| Akash190104       |             6 |           0 |           0 |                     0 |              0 |                0 |          0 |       6 |
 | marcobellagente93 |             6 |           0 |           0 |                     0 |              0 |                0 |          0 |       6 |
 | izhx              |             6 |           0 |           0 |                     0 |              0 |                0 |          0 |       6 |
+| bp-high           |             6 |           0 |           0 |                     0 |              0 |                0 |          0 |       6 |
 | rasdani           |             4 |           0 |           0 |                     0 |              0 |                0 |          0 |       4 |
-| PhilipMay         |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
 | shreeya-dhakal    |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
-| hanhainebula      |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
+| dipam7            |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
 | davidstap         |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
-| tomaarsen         |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
 | achibb            |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
+| tomaarsen         |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
+| PhilipMay         |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
+| NouamaneTazi      |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
 | ManuelFay         |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
-| NouamaneTazi      |             0 |           2 |           0 |                     0 |              0 |                0 |          0 |       2 |
+| hanhainebula      |             2 |           0 |           0 |                     0 |              0 |                0 |          0 |       2 |
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "rbroc", "New dataset": 2}
		{"GitHub": "KennethEnevoldsen", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "KennethEnevoldsen", "Coordination": 2}
		{"GitHub": "isaac-chung", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "dipam7", "New dataset": 2}
		{"GitHub": "isaac-chung", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "Akash190104", "New dataset": 2}
		{"GitHub": "isaac-chung", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "bp-high", "New dataset": 6}
		{"GitHub": "isaac-chung", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "isaac-chung", "Review PR": 2}
		{"GitHub": "KennethEnevoldsen", "Bug fixes": 3}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "isaac-chung", "Review PR": 2}
		{"GitHub": "asparius", "New dataset": 6}