Skip to content

semeval-2016-2017-task3-subtaskBC

Compare
Choose a tag to compare
@menshikh-iv menshikh-iv released this 05 Feb 18:42
· 4 commits to master since this release

SemEval 2016 / 2017 Task 3 Subtask B and C datasets contain train+development (317 original questions, 3,169 related questions, and 31,690 comments), and test datasets in English. The description of the tasks and the collected data is given in sections 3 and 4.1 of the 2016 task paper linked in section “Papers” of #18.

Related issue #18

attribute value
File size 6MB
Number of records 4 (upper level)

Read more:

Produced by: https://github.com/Witiko/semeval-2016_2017-task3-subtaskB-english

Example:

import gensim.downloader as api
from gensim.corpora import Dictionary
from gensim.similarities import MatrixSimilarity
from gensim.utils import simple_preprocess
import numpy as np


def read_corpus():
    for thread in api.load("semeval-2016-2017-task3-subtaskA-unannotated"):
        yield simple_preprocess(thread["RelQuestion"]["RelQSubject"])
        yield simple_preprocess(thread["RelQuestion"]["RelQBody"])
        for relcomment in thread["RelComments"]:
            yield simple_preprocess(relcomment["RelCText"])


dictionary = Dictionary(read_corpus())
datasets = api.load("semeval-2016-2017-task3-subtaskBC")


def produce_test_data(dataset):
    for orgquestion in datasets[dataset]:
        relquestions = [
            (
                dictionary.doc2bow(simple_preprocess(thread["RelQuestion"]["RelQSubject"]) + simple_preprocess(thread["RelQuestion"]["RelQBody"])),
                thread["RelQuestion"]["RELQ_RELEVANCE2ORGQ"] in ("PerfectMatch", "Relevant")
            )
            for thread in orgquestion["Threads"]
        ]

        relcomments = [
            (
                dictionary.doc2bow(simple_preprocess(relcomment["RelCText"])),
                relcomment["RELC_RELEVANCE2ORGQ"] == "Good"
            )
            for thread in orgquestion["Threads"] for relcomment in thread["RelComments"]
        ]

        orgquestion = dictionary.doc2bow(simple_preprocess(orgquestion["OrgQSubject"]) + simple_preprocess(orgquestion["OrgQBody"]))
        yield orgquestion, dict(subtaskB=relquestions, subtaskC=relcomments)


def average_precision(similarities, relevance):
    precision = [
        (num_correct + 1) / (num_total + 1) \
        for num_correct, num_total in enumerate(
            num_total for num_total, (_, relevant) in enumerate(
                sorted(zip(similarities, relevance), reverse=True)
            )
            if relevant)
        ]

    return np.mean(precision) if precision else 0.0


def evaluate(dataset, subtask):
    results = []
    for orgquestion, subtasks in produce_test_data(dataset):
        documents, relevance = zip(*subtasks[subtask])
        index = MatrixSimilarity(documents, num_features=len(dictionary))
        similarities = index[orgquestion]
        results.append(average_precision(similarities, relevance))

    return np.mean(results) * 100.0


for dataset in ("2016-dev", "2016-test", "2017-test"):
    print("MAP score on the {} dataset:\t{:.2f} (Subtask B)\t{:.2f} (Subtask C)".format(dataset, evaluate(dataset, "subtaskB"), evaluate(dataset, "subtaskC")))



"""
Output:

MAP score on the 2016-dev dataset:	41.89 (Subtask B)	3.33 (Subtask C)
MAP score on the 2016-test dataset:	51.42 (Subtask B)	5.59 (Subtask C)
MAP score on the 2017-test dataset:	23.65 (Subtask B)	0.74 (Subtask C)
"""