From 373db747d807c3f2597269ac9abf50291673764d Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 13 Dec 2024 15:42:01 +0500 Subject: [PATCH] fix: Eval langs not correctly passed to monolingual tasks (#1587) * fix SouthAfricanLangClassification.py * add check for langs * lint --- .../multilingual/HinDialectClassification.py | 46 +++++++++---------- .../SouthAfricanLangClassification.py | 26 +++++------ tests/test_TaskMetadata.py | 13 ++++++ 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/mteb/tasks/Classification/multilingual/HinDialectClassification.py b/mteb/tasks/Classification/multilingual/HinDialectClassification.py index 6565d4b71..c9d6b3666 100644 --- a/mteb/tasks/Classification/multilingual/HinDialectClassification.py +++ b/mteb/tasks/Classification/multilingual/HinDialectClassification.py @@ -3,29 +3,29 @@ from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.TaskMetadata import TaskMetadata -_LANGUAGES = { - "pan": ["pan-Guru"], - "bgc": ["bgc-Deva"], - "mag": ["mag-Deva"], - "bns": ["bns-Deva"], - "kfq": ["kfg-Deva"], - "noe": ["noe-Deva"], - "bhb": ["bhb-Deva"], - "bho": ["bho-Deva"], - "gbm": ["gbm-Deva"], - "mup": ["mup-Deva"], - "anp": ["anp-Deva"], - "hne": ["hne-Deva"], - "bra": ["bra-Deva"], - "raj": ["raj-Deva"], - "awa": ["awa-Deva"], - "guj": ["guj-Gujr"], - "ben": ["ben-Beng"], - "bhd": ["bhd-Deva"], - "kfy": ["kfy-Deva"], - "mar": ["mar-Deva"], - "bjj": ["bjj-Deva"], -} +_LANGUAGES = [ + "pan-Guru", + "bgc-Deva", + "mag-Deva", + "bns-Deva", + "kfg-Deva", + "noe-Deva", + "bhb-Deva", + "bho-Deva", + "gbm-Deva", + "mup-Deva", + "anp-Deva", + "hne-Deva", + "bra-Deva", + "raj-Deva", + "awa-Deva", + "guj-Gujr", + "ben-Beng", + "bhd-Deva", + "kfy-Deva", + "mar-Deva", + "bjj-Deva", +] class HinDialectClassification(AbsTaskClassification): diff --git a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py index 4cef2c060..217d300ec 100644 --- a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py +++ b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py @@ -3,19 +3,19 @@ from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.TaskMetadata import TaskMetadata -_LANGUAGES = { - "afr": ["afr-Latn"], - "eng": ["eng-Latn"], - "nbl": ["nbl-Latn"], - "nso": ["nso-Latn"], - "sot": ["sot-Latn"], - "ssw": ["ssw-Latn"], - "tsn": ["tsn-Latn"], - "tso": ["tso-Latn"], - "ven": ["ven-Latn"], - "xho": ["xho-Latn"], - "zul": ["zul-Latn"], -} +_LANGUAGES = [ + "afr-Latn", + "eng-Latn", + "nbl-Latn", + "nso-Latn", + "sot-Latn", + "ssw-Latn", + "tsn-Latn", + "tso-Latn", + "ven-Latn", + "xho-Latn", + "zul-Latn", +] class SouthAfricanLangClassification(AbsTaskClassification): diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 91ef4aabe..2b606c2c1 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -4,6 +4,7 @@ import pytest +from mteb import AbsTask from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.overview import get_tasks @@ -1095,3 +1096,15 @@ def test_empy_descriptive_stat_in_new_datasets(): assert ( task.metadata.name not in exceptions ), f"Dataset {task.metadata.name} should have descriptive stats" + + +@pytest.mark.parametrize("task", get_tasks()) +def test_eval_langs_correctly_specified(task: AbsTask): + if task.is_multilingual: + assert isinstance( + task.metadata.eval_langs, dict + ), f"{task.metadata.name} should have eval_langs as a dict" + else: + assert isinstance( + task.metadata.eval_langs, list + ), f"{task.metadata.name} should have eval_langs as a list"