Skip to content

Commit

Permalink
add the tests back
Browse files Browse the repository at this point in the history
  • Loading branch information
patil-suraj committed Feb 12, 2021
1 parent efa8eaa commit 0283478
Show file tree
Hide file tree
Showing 18 changed files with 1,044 additions and 0 deletions.
94 changes: 94 additions & 0 deletions examples/legacy/seq2seq/test_calculate_rouge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import defaultdict
from pathlib import Path

import pandas as pd

from rouge_cli import calculate_rouge_path
from utils import calculate_rouge


PRED = [
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe depression" German airline confirms it knew of Andreas Lubitz\'s depression years before he took control.',
"The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the body.",
"Amnesty International releases its annual report on the death penalty. The report catalogs the use of state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital punishment.",
]

TGT = [
'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports . Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says .',
"Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June . Israel and the United States opposed the move, which could open the door to war crimes investigations against Israelis .",
"Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to death . Organization claims that governments around the world are using the threat of terrorism to advance executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death sentences up by 28% .",
]


def test_disaggregated_scores_are_determinstic():
no_aggregation = calculate_rouge(PRED, TGT, bootstrap_aggregation=False, rouge_keys=["rouge2", "rougeL"])
assert isinstance(no_aggregation, defaultdict)
no_aggregation_just_r2 = calculate_rouge(PRED, TGT, bootstrap_aggregation=False, rouge_keys=["rouge2"])
assert (
pd.DataFrame(no_aggregation["rouge2"]).fmeasure.mean()
== pd.DataFrame(no_aggregation_just_r2["rouge2"]).fmeasure.mean()
)


def test_newline_cnn_improvement():
k = "rougeLsum"
score = calculate_rouge(PRED, TGT, newline_sep=True, rouge_keys=[k])[k]
score_no_sep = calculate_rouge(PRED, TGT, newline_sep=False, rouge_keys=[k])[k]
assert score > score_no_sep


def test_newline_irrelevant_for_other_metrics():
k = ["rouge1", "rouge2", "rougeL"]
score_sep = calculate_rouge(PRED, TGT, newline_sep=True, rouge_keys=k)
score_no_sep = calculate_rouge(PRED, TGT, newline_sep=False, rouge_keys=k)
assert score_sep == score_no_sep


def test_single_sent_scores_dont_depend_on_newline_sep():
pred = [
"Her older sister, Margot Frank, died in 1945, a month earlier than previously thought.",
'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports .',
]
tgt = [
"Margot Frank, died in 1945, a month earlier than previously thought.",
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525.',
]
assert calculate_rouge(pred, tgt, newline_sep=True) == calculate_rouge(pred, tgt, newline_sep=False)


def test_pegasus_newline():

pred = [
"""" "a person who has such a video needs to immediately give it to the investigators," prosecutor says .<n> "it is a very disturbing scene," editor-in-chief of bild online tells "erin burnett: outfront" """
]
tgt = [
""" Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports . Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says ."""
]

prev_score = calculate_rouge(pred, tgt, rouge_keys=["rougeLsum"], newline_sep=False)["rougeLsum"]
new_score = calculate_rouge(pred, tgt, rouge_keys=["rougeLsum"])["rougeLsum"]
assert new_score > prev_score


def test_rouge_cli():
data_dir = Path("examples/seq2seq/test_data/wmt_en_ro")
metrics = calculate_rouge_path(data_dir.joinpath("test.source"), data_dir.joinpath("test.target"))
assert isinstance(metrics, dict)
metrics_default_dict = calculate_rouge_path(
data_dir.joinpath("test.source"), data_dir.joinpath("test.target"), bootstrap_aggregation=False
)
assert isinstance(metrics_default_dict, defaultdict)
33 changes: 33 additions & 0 deletions examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python

import io
import json
import subprocess


pairs = [
["en", "ru"],
["ru", "en"],
["en", "de"],
["de", "en"],
]

n_objs = 8


def get_all_data(pairs, n_objs):
text = {}
for src, tgt in pairs:
pair = f"{src}-{tgt}"
cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split()
src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split()
tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]}
return text


text = get_all_data(pairs, n_objs)
filename = "./fsmt_val_data.json"
with io.open(filename, "w", encoding="utf-8") as f:
bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)
90 changes: 90 additions & 0 deletions examples/legacy/seq2seq/test_data/fsmt/fsmt_val_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
{
"en-ru": {
"src": [
"Welsh AMs worried about 'looking like muppets'",
"There is consternation among some AMs at a suggestion their title should change to MWPs (Member of the Welsh Parliament).",
"It has arisen because of plans to change the name of the assembly to the Welsh Parliament.",
"AMs across the political spectrum are worried it could invite ridicule.",
"One Labour AM said his group was concerned \"it rhymes with Twp and Pwp.\"",
"For readers outside of Wales: In Welsh twp means daft and pwp means poo.",
"A Plaid AM said the group as a whole was \"not happy\" and has suggested alternatives.",
"A Welsh Conservative said his group was \"open minded\" about the name change, but noted it was a short verbal hop from MWP to Muppet."
],
"tgt": [
"Члены Национальной ассамблеи Уэльса обеспокоены, что \"выглядят как куклы\"",
"Некоторые члены Национальной ассамблеи Уэльса в ужасе от предложения о том, что их наименование должно измениться на MPW (члены Парламента Уэльса).",
"Этот вопрос был поднят в связи с планами по переименованию ассамблеи в Парламент Уэльса.",
"Члены Национальной ассамблеи Уэльса всего политического спектра обеспокоены, что это может породить насмешки.",
"Один из лейбористских членов Национальной ассамблеи Уэльса сказал, что его партия обеспокоена тем, что \"это рифмуется с Twp и Pwp\".",
"Для читателей за предлами Уэльса: по-валлийски twp означает \"глупый\", а pwp означает \"какашка\".",
"Член Национальной ассамблеи от Плайд сказал, что эта партия в целом \"не счастлива\" и предложил альтернативы.",
"Представитель Консервативной партии Уэльса сказал, что его партия \"открыта\" к переименованию, но отметил, что между WMP и Muppet небольшая разница в произношении."
]
},
"ru-en": {
"src": [
"Названо число готовящихся к отправке в Донбасс новобранцев из Украины",
"Официальный представитель Народной милиции самопровозглашенной Луганской Народной Республики (ЛНР) Андрей Марочко заявил, что зимой 2018-2019 года Украина направит в Донбасс не менее 3 тыс. новобранцев.",
"По его словам, таким образом Киев планирует \"хоть как-то доукомплектовать подразделения\".",
"\"Нежелание граждан Украины проходить службу в рядах ВС Украины, массовые увольнения привели к низкой укомплектованности подразделений\", - рассказал Марочко, которого цитирует \"РИА Новости\".",
"Он также не исключил, что реальные цифры призванных в армию украинцев могут быть увеличены в случае необходимости.",
"В 2014-2017 годах Киев начал так называемую антитеррористическую операцию (АТО), которую позже сменили на операцию объединенных сил (ООС).",
"Предполагалось, что эта мера приведет к усилению роли украинских силовиков в урегулировании ситуации.",
"В конце августа 2018 года ситуация в Донбассе обострилась из-за убийства главы ДНР Александра Захарченко."
],
"tgt": [
"The number of new Ukrainian recruits ready to go to Donbass has become public",
"Official representative of the peoples’ militia of the self-proclaimed Lugansk People’s Republic Andrey Marochko claimed that Ukrainian will send at least 3 thousand new recruits to Donbass in winter 2018-2019.",
"This is how Kyiv tries “at least somehow to staff the units,” he said.",
"“The unwillingness of Ukrainian citizens to serve in the Ukraine’s military forces, mass resignments lead to low understaffing,” said Marochko cited by RIA Novosti.",
"Also, he doesn’t exclude that the real numbers of conscripts in the Ukrainian army can be raised is necessary.",
"In 2014-2017, Kyiv started so-called antiterrorist operation, that ws later changed to the united forces operation.",
"This measure was supposed to strengthen the role of the Ukrainian military in settling the situation.",
"In the late August 2018, the situation in Donbass escalated as the DNR head Aleksandr Zakharchenko was killed."
]
},
"en-de": {
"src": [
"Welsh AMs worried about 'looking like muppets'",
"There is consternation among some AMs at a suggestion their title should change to MWPs (Member of the Welsh Parliament).",
"It has arisen because of plans to change the name of the assembly to the Welsh Parliament.",
"AMs across the political spectrum are worried it could invite ridicule.",
"One Labour AM said his group was concerned \"it rhymes with Twp and Pwp.\"",
"For readers outside of Wales: In Welsh twp means daft and pwp means poo.",
"A Plaid AM said the group as a whole was \"not happy\" and has suggested alternatives.",
"A Welsh Conservative said his group was \"open minded\" about the name change, but noted it was a short verbal hop from MWP to Muppet."
],
"tgt": [
"Walisische Ageordnete sorgen sich \"wie Dödel auszusehen\"",
"Es herrscht Bestürzung unter einigen Mitgliedern der Versammlung über einen Vorschlag, der ihren Titel zu MWPs (Mitglied der walisischen Parlament) ändern soll.",
"Der Grund dafür waren Pläne, den Namen der Nationalversammlung in Walisisches Parlament zu ändern.",
"Mitglieder aller Parteien der Nationalversammlung haben Bedenken, dass sie sich dadurch Spott aussetzen könnten.",
"Ein Labour-Abgeordneter sagte, dass seine Gruppe \"sich mit Twp und Pwp reimt\".",
"Hinweis für den Leser: „twp“ im Walisischen bedeutet „bescheuert“ und „pwp“ bedeutet „Kacke“.",
"Ein Versammlungsmitglied von Plaid Cymru sagte, die Gruppe als Ganzes sei \"nicht glücklich\" und hat Alternativen vorgeschlagen.",
"Ein walisischer Konservativer sagte, seine Gruppe wäre „offen“ für eine Namensänderung, wies aber darauf hin, dass es von „MWP“ (Mitglied des Walisischen Parlaments) nur ein kurzer verbaler Sprung zu „Muppet“ ist."
]
},
"de-en": {
"src": [
"Schöne Münchnerin 2018: Schöne Münchnerin 2018 in Hvar: Neun Dates",
"Von az, aktualisiert am 04.05.2018 um 11:11",
"Ja, sie will...",
"\"Schöne Münchnerin\" 2018 werden!",
"Am Nachmittag wartet erneut eine Überraschung auf unsere Kandidatinnen: sie werden das romantische Candlelight-Shooting vor der MY SOLARIS nicht alleine bestreiten, sondern an der Seite von Male-Model Fabian!",
"Hvar - Flirten, kokettieren, verführen - keine einfachen Aufgaben für unsere Mädchen.",
"Insbesondere dann, wenn in Deutschland ein Freund wartet.",
"Dennoch liefern die neun \"Schöne Münchnerin\"-Kandidatinnen beim Shooting mit People-Fotograf Tuan ab und trotzen Wind, Gischt und Regen wie echte Profis."
],
"tgt": [
"The Beauty of Munich 2018: the Beauty of Munich 2018 in Hvar: Nine dates",
"From A-Z, updated on 04/05/2018 at 11:11",
"Yes, she wants to...",
"to become \"The Beauty of Munich\" in 2018!",
"In the afternoon there is another surprise waiting for our contestants: they will be competing for the romantic candlelight photo shoot at MY SOLARIS not alone, but together with a male-model Fabian!",
"Hvar with its flirting, coquetting, and seduction is not an easy task for our girls.",
"Especially when there is a boyfriend waiting in Germany.",
"Despite dealing with wind, sprays and rain, the nine contestants of \"The Beauty of Munich\" behaved like real professionals at the photo shoot with People-photographer Tuan."
]
}
}
1 change: 1 addition & 0 deletions examples/legacy/seq2seq/test_data/test_data
Loading

0 comments on commit 0283478

Please sign in to comment.