Skip to content

Commit

Permalink
fix: Task load data error for SICK-BR-STS and XStance (#1534)
Browse files Browse the repository at this point in the history
* fix task load data for two tasks

* correct dataset keys
  • Loading branch information
isaac-chung authored Dec 1, 2024
1 parent e949d2a commit 5b6f20f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
4 changes: 2 additions & 2 deletions mteb/tasks/PairClassification/multilingual/XStance.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def dataset_transform(self):
for split in self.metadata.eval_splits:
_dataset[lang][split] = [
{
"sent1": self.dataset[lang][split]["sent1"],
"sent2": self.dataset[lang][split]["sent2"],
"sentence1": self.dataset[lang][split]["sentence1"],
"sentence2": self.dataset[lang][split]["sentence2"],
"labels": self.dataset[lang][split]["labels"],
}
]
Expand Down
14 changes: 6 additions & 8 deletions mteb/tasks/STS/por/SickBrSTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,12 @@ def metadata_dict(self) -> dict[str, str]:
return metadata_dict

def dataset_transform(self):
for split in self.dataset:
self.dataset.update(
{
split: self.dataset[split].train_test_split(
test_size=N_SAMPLES, seed=self.seed, label="entailment_label"
)["test"]
}
)
self.dataset = self.stratified_subsampling(
self.dataset,
seed=42,
splits=self.metadata.eval_splits,
label="entailment_label",
)

self.dataset = self.dataset.rename_columns(
{
Expand Down

0 comments on commit 5b6f20f

Please sign in to comment.