Skip to content

Commit

Permalink
fix: Eval langs not correctly passed to monolingual tasks (#1587)
Browse files Browse the repository at this point in the history
* fix SouthAfricanLangClassification.py

* add check for langs

* lint
  • Loading branch information
Samoed authored Dec 13, 2024
1 parent 9c0b208 commit 373db74
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 36 deletions.
46 changes: 23 additions & 23 deletions mteb/tasks/Classification/multilingual/HinDialectClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,29 @@
from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
from mteb.abstasks.TaskMetadata import TaskMetadata

_LANGUAGES = {
"pan": ["pan-Guru"],
"bgc": ["bgc-Deva"],
"mag": ["mag-Deva"],
"bns": ["bns-Deva"],
"kfq": ["kfg-Deva"],
"noe": ["noe-Deva"],
"bhb": ["bhb-Deva"],
"bho": ["bho-Deva"],
"gbm": ["gbm-Deva"],
"mup": ["mup-Deva"],
"anp": ["anp-Deva"],
"hne": ["hne-Deva"],
"bra": ["bra-Deva"],
"raj": ["raj-Deva"],
"awa": ["awa-Deva"],
"guj": ["guj-Gujr"],
"ben": ["ben-Beng"],
"bhd": ["bhd-Deva"],
"kfy": ["kfy-Deva"],
"mar": ["mar-Deva"],
"bjj": ["bjj-Deva"],
}
_LANGUAGES = [
"pan-Guru",
"bgc-Deva",
"mag-Deva",
"bns-Deva",
"kfg-Deva",
"noe-Deva",
"bhb-Deva",
"bho-Deva",
"gbm-Deva",
"mup-Deva",
"anp-Deva",
"hne-Deva",
"bra-Deva",
"raj-Deva",
"awa-Deva",
"guj-Gujr",
"ben-Beng",
"bhd-Deva",
"kfy-Deva",
"mar-Deva",
"bjj-Deva",
]


class HinDialectClassification(AbsTaskClassification):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
from mteb.abstasks.TaskMetadata import TaskMetadata

_LANGUAGES = {
"afr": ["afr-Latn"],
"eng": ["eng-Latn"],
"nbl": ["nbl-Latn"],
"nso": ["nso-Latn"],
"sot": ["sot-Latn"],
"ssw": ["ssw-Latn"],
"tsn": ["tsn-Latn"],
"tso": ["tso-Latn"],
"ven": ["ven-Latn"],
"xho": ["xho-Latn"],
"zul": ["zul-Latn"],
}
_LANGUAGES = [
"afr-Latn",
"eng-Latn",
"nbl-Latn",
"nso-Latn",
"sot-Latn",
"ssw-Latn",
"tsn-Latn",
"tso-Latn",
"ven-Latn",
"xho-Latn",
"zul-Latn",
]


class SouthAfricanLangClassification(AbsTaskClassification):
Expand Down
13 changes: 13 additions & 0 deletions tests/test_TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from mteb import AbsTask
from mteb.abstasks.TaskMetadata import TaskMetadata
from mteb.overview import get_tasks

Expand Down Expand Up @@ -1095,3 +1096,15 @@ def test_empy_descriptive_stat_in_new_datasets():
assert (
task.metadata.name not in exceptions
), f"Dataset {task.metadata.name} should have descriptive stats"


@pytest.mark.parametrize("task", get_tasks())
def test_eval_langs_correctly_specified(task: AbsTask):
if task.is_multilingual:
assert isinstance(
task.metadata.eval_langs, dict
), f"{task.metadata.name} should have eval_langs as a dict"
else:
assert isinstance(
task.metadata.eval_langs, list
), f"{task.metadata.name} should have eval_langs as a list"

0 comments on commit 373db74

Please sign in to comment.