From 5b6f20fe6fbe7673480fbb8c36402ddbe7e203a2 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Sun, 1 Dec 2024 16:26:32 +0200 Subject: [PATCH] fix: Task load data error for SICK-BR-STS and XStance (#1534) * fix task load data for two tasks * correct dataset keys --- .../PairClassification/multilingual/XStance.py | 4 ++-- mteb/tasks/STS/por/SickBrSTS.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index 515e598940..03d4f066e7 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -100,8 +100,8 @@ def dataset_transform(self): for split in self.metadata.eval_splits: _dataset[lang][split] = [ { - "sent1": self.dataset[lang][split]["sent1"], - "sent2": self.dataset[lang][split]["sent2"], + "sentence1": self.dataset[lang][split]["sentence1"], + "sentence2": self.dataset[lang][split]["sentence2"], "labels": self.dataset[lang][split]["labels"], } ] diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index 7f42fadd80..5298ab5437 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -60,14 +60,12 @@ def metadata_dict(self) -> dict[str, str]: return metadata_dict def dataset_transform(self): - for split in self.dataset: - self.dataset.update( - { - split: self.dataset[split].train_test_split( - test_size=N_SAMPLES, seed=self.seed, label="entailment_label" - )["test"] - } - ) + self.dataset = self.stratified_subsampling( + self.dataset, + seed=42, + splits=self.metadata.eval_splits, + label="entailment_label", + ) self.dataset = self.dataset.rename_columns( {