-
Notifications
You must be signed in to change notification settings - Fork 283
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: AddingGeorgian Sentiment Classification (#534)
* Adding First Georgian Dataset * Points * Update __init__.py * Update __init__.py
- Loading branch information
Showing
5 changed files
with
93 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
{"GitHub": "isaac-chung", "Review PR": 2} | ||
{"GitHub": "asparius", "New dataset": 2} |
61 changes: 61 additions & 0 deletions
61
mteb/tasks/Classification/kat/GeorgianSentimentClassification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from __future__ import annotations | ||
|
||
from mteb.abstasks import AbsTaskClassification | ||
from mteb.abstasks.TaskMetadata import TaskMetadata | ||
|
||
|
||
class GeorgianSentimentClassification(AbsTaskClassification): | ||
metadata = TaskMetadata( | ||
name="GeorgianSentimentClassification", | ||
description="Goergian Sentiment Dataset", | ||
reference="https://aclanthology.org/2022.lrec-1.173", | ||
dataset={ | ||
"path": "asparius/Georgian-Sentiment", | ||
"revision": "d4fb68dff38e89c42406080737b8431ea48fa866", | ||
}, | ||
type="Classification", | ||
category="s2s", | ||
eval_splits=["test"], | ||
eval_langs=["kat-Geor"], | ||
main_score="accuracy", | ||
date=("2022-01-01", "2022-06-25"), | ||
form=["written"], | ||
domains=["Reviews"], | ||
task_subtypes=["Sentiment/Hate speech"], | ||
license="CC BY 4.0", | ||
socioeconomic_status="mixed", | ||
annotations_creators="derived", | ||
dialect=[], | ||
text_creation="found", | ||
bibtex_citation=""" | ||
@inproceedings{stefanovitch-etal-2022-resources, | ||
title = "Resources and Experiments on Sentiment Classification for {G}eorgian", | ||
author = "Stefanovitch, Nicolas and | ||
Piskorski, Jakub and | ||
Kharazi, Sopho", | ||
editor = "Calzolari, Nicoletta and | ||
B{\'e}chet, Fr{\'e}d{\'e}ric and | ||
Blache, Philippe and | ||
Choukri, Khalid and | ||
Cieri, Christopher and | ||
Declerck, Thierry and | ||
Goggi, Sara and | ||
Isahara, Hitoshi and | ||
Maegaard, Bente and | ||
Mariani, Joseph and | ||
Mazo, H{\'e}l{\`e}ne and | ||
Odijk, Jan and | ||
Piperidis, Stelios", | ||
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", | ||
month = jun, | ||
year = "2022", | ||
address = "Marseille, France", | ||
publisher = "European Language Resources Association", | ||
url = "https://aclanthology.org/2022.lrec-1.173", | ||
pages = "1613--1621", | ||
abstract = "This paper presents, to the best of our knowledge, the first ever publicly available annotated dataset for sentiment classification and semantic polarity dictionary for Georgian. The characteristics of these resources and the process of their creation are described in detail. The results of various experiments on the performance of both lexicon- and machine learning-based models for Georgian sentiment classification are also reported. Both 3-label (positive, neutral, negative) and 4-label settings (same labels + mixed) are considered. The machine learning models explored include, i.a., logistic regression, SVMs, and transformed-based models. We also explore transfer learning- and translation-based (to a well-supported language) approaches. The obtained results for Georgian are on par with the state-of-the-art results in sentiment classification for well studied languages when using training data of comparable size.", | ||
} | ||
""", | ||
n_samples={"train": 330, "test": 1200}, | ||
avg_character_length={"train": 114.26, "test": 118.06}, | ||
) |
Empty file.
15 changes: 15 additions & 0 deletions
15
results/intfloat__multilingual-e5-small/GeorgianSentimentClassification.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"dataset_revision": "d4fb68dff38e89c42406080737b8431ea48fa866", | ||
"mteb_dataset_name": "GeorgianSentimentClassification", | ||
"mteb_version": "1.6.8", | ||
"test": { | ||
"accuracy": 0.7464166666666666, | ||
"accuracy_stderr": 0.047843857727217436, | ||
"ap": 0.6872367759871585, | ||
"ap_stderr": 0.048726518159705225, | ||
"evaluation_time": 37.8, | ||
"f1": 0.7455938411656741, | ||
"f1_stderr": 0.0485250595381615, | ||
"main_score": 0.7464166666666666 | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
...-transformers__paraphrase-multilingual-MiniLM-L12-v2/GeorgianSentimentClassification.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"dataset_revision": "d4fb68dff38e89c42406080737b8431ea48fa866", | ||
"mteb_dataset_name": "GeorgianSentimentClassification", | ||
"mteb_version": "1.6.8", | ||
"test": { | ||
"accuracy": 0.7571666666666667, | ||
"accuracy_stderr": 0.027223866163513386, | ||
"ap": 0.6982303339367802, | ||
"ap_stderr": 0.03866108274790974, | ||
"evaluation_time": 29.7, | ||
"f1": 0.7552573260226974, | ||
"f1_stderr": 0.027441367391553517, | ||
"main_score": 0.7571666666666667 | ||
} | ||
} |