Skip to content

Commit

Permalink
Working on generating Relevant Docs files with TF-IDF
Browse files Browse the repository at this point in the history
  • Loading branch information
pedrojlazevedo committed Mar 10, 2020
1 parent 97956ca commit e954e1d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 24 deletions.
18 changes: 8 additions & 10 deletions metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
id : id of the claim
verifiable : boolean of 1 and 0 with respective meaning
docs : set of documents that verify the claim
docs_sep : set of documents seperated
docs_sep : set of documents separated
evidences: list of tuples of <doc, line>
difficulties: list of the number of sentences needed to be evidence
'''
Expand All @@ -64,7 +64,7 @@

# get gold inputs
gold_documents = set()
gold_documents_seperated = set()
gold_documents_separated = set()
sentences_pair = set()
evidences = claim['evidence']
difficulties = []
Expand All @@ -76,20 +76,20 @@
doc_name += str(e[2])
doc_name += " "
sentences_pair.add((str(e[2]), str(e[3]))) # add gold sentences
gold_documents_seperated.add(str(e[2])) # add the document
gold_documents_separated.add(str(e[2])) # add the document
difficulty += 1
doc_name = doc_name[:-1] # erase the last blank space
else:
doc_name = str(evidence[0][2])
gold_documents_seperated.add(str(evidence[0][2]))
gold_documents_separated.add(str(evidence[0][2]))
sentences_pair.add((str(evidence[0][2]), str(evidence[0][3])))
difficulty = 1
difficulties.append(difficulty)
gold_documents.add(doc_name)
gold_dict['difficulties'] = difficulties
gold_dict['docs'] = gold_documents
gold_dict['evidences'] = sentences_pair
gold_dict['docs_sep'] = gold_documents_seperated
gold_dict['docs_sep'] = gold_documents_separated

gold_data.append(gold_dict)

Expand Down Expand Up @@ -194,9 +194,6 @@
if stop == -1:
break

# scores from fever
results = fever_score(train_prediction, actual = train_set)

precision_correct /= total_claim
precision_incorrect /= total_claim
recall_correct /= total_claim
Expand All @@ -214,7 +211,6 @@
print("Fall-out: " + str(specificity))
print("Percentage of at least one document found correctly: " + str(doc_found)) # recall


precision_sent_correct /= total_claim
precision_sent_incorrect /= total_claim
recall_sent_correct /= total_claim
Expand All @@ -234,6 +230,9 @@
print("Percentage of at least one Sentence found correctly: " + str(sent_found_if_doc_found)) # recall
print("Percentage of at least one Sentence found correctly: " + str(another_sent)) # recall

# scores from fever
results = fever_score(train_prediction, actual=train_set)

print("\n#########")
print("# FEVER #")
print("#########")
Expand All @@ -242,4 +241,3 @@
print("Precision: \t\t\t" + str(results[2]))
print("Recall: \t\t\t" + str(results[3]))
print("F1-Score: \t\t\t" + str(results[4]))

5 changes: 5 additions & 0 deletions predict_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
mkdir -p data
mkdir -p data/fever-data
wget -O data/fever-data/train.jsonl https://s3-eu-west-1.amazonaws.com/fever.public/train.jsonl
wget -O data/fever-data/dev.jsonl https://s3-eu-west-1.amazonaws.com/fever.public/shared_task_dev.jsonl
29 changes: 15 additions & 14 deletions scorer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import six


def check_predicted_evidence_format(instance):
if 'predicted_evidence' in instance.keys() and len(instance['predicted_evidence']):
assert all(isinstance(prediction, list)
Expand All @@ -11,7 +12,7 @@ def check_predicted_evidence_format(instance):
"Predicted evidence must be a list of (page,line) lists"

assert all(isinstance(prediction[0], six.string_types)
for prediction in instance["predicted_evidence"]), \
for prediction in instance["predicted_evidence"]), \
"Predicted evidence must be a list of (page<string>,line<int>) lists"

assert all(isinstance(prediction[1], int)
Expand All @@ -24,7 +25,7 @@ def is_correct_label(instance):


def is_strictly_correct(instance, max_evidence=None):
#Strict evidence matching is only for NEI class
# Strict evidence matching is only for NEI class
check_predicted_evidence_format(instance)

if instance["label"].upper() != "NOT ENOUGH INFO" and is_correct_label(instance):
Expand All @@ -33,15 +34,14 @@ def is_strictly_correct(instance, max_evidence=None):
if max_evidence is None:
max_evidence = len(instance["predicted_evidence"])


for evience_group in instance["evidence"]:
#Filter out the annotation ids. We just want the evidence page and line number
# Filter out the annotation ids. We just want the evidence page and line number
actual_sentences = [[e[2], e[3]] for e in evience_group]
#Only return true if an entire group of actual sentences is in the predicted sentences
# Only return true if an entire group of actual sentences is in the predicted sentences
if all([actual_sent in instance["predicted_evidence"][:max_evidence] for actual_sent in actual_sentences]):
return True

#If the class is NEI, we don't score the evidence retrieval component
# If the class is NEI, we don't score the evidence retrieval component
elif instance["label"].upper() == "NOT ENOUGH INFO" and is_correct_label(instance):
return True

Expand All @@ -56,7 +56,7 @@ def evidence_macro_precision(instance, max_evidence=None):
all_evi = [[e[2], e[3]] for eg in instance["evidence"] for e in eg if e[3] is not None]

predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
instance["predicted_evidence"][:max_evidence]
instance["predicted_evidence"][:max_evidence]

for prediction in predicted_evidence:
if prediction in all_evi:
Expand All @@ -67,15 +67,16 @@ def evidence_macro_precision(instance, max_evidence=None):

return 0.0, 0.0


def evidence_macro_recall(instance, max_evidence=None):
# We only want to score F1/Precision/Recall of recalled evidence for NEI claims
if instance["label"].upper() != "NOT ENOUGH INFO":
# If there's no evidence to predict, return 1
if len(instance["evidence"]) == 0 or all([len(eg) == 0 for eg in instance]):
return 1.0, 1.0
return 1.0, 1.0

predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
instance["predicted_evidence"][:max_evidence]
instance["predicted_evidence"][:max_evidence]

for evidence_group in instance["evidence"]:
evidence = [[e[2], e[3]] for e in evidence_group]
Expand Down Expand Up @@ -103,7 +104,7 @@ def evidence_micro_precision(instance):
return this_precision, this_precision_hits


def fever_score(predictions,actual=None, max_evidence=5):
def fever_score(predictions, actual=None, max_evidence=5):
correct = 0
strict = 0

Expand All @@ -113,10 +114,10 @@ def fever_score(predictions,actual=None, max_evidence=5):
macro_recall = 0
macro_recall_hits = 0

for idx,instance in enumerate(predictions):
for idx, instance in enumerate(predictions):
assert 'predicted_evidence' in instance.keys(), 'evidence must be provided for the prediction'

#If it's a blind test set, we need to copy in the values from the actual data
# If it's a blind test set, we need to copy in the values from the actual data
if 'evidence' not in instance or 'label' not in instance:
assert actual is not None, 'in blind evaluation mode, actual data must be provided'
assert len(actual) == len(predictions), 'actual data and predicted data length must match'
Expand All @@ -130,7 +131,7 @@ def fever_score(predictions,actual=None, max_evidence=5):
correct += 1.0

if is_strictly_correct(instance, max_evidence):
strict+=1.0
strict += 1.0

macro_prec = evidence_macro_precision(instance, max_evidence)
macro_precision += macro_prec[0]
Expand All @@ -150,4 +151,4 @@ def fever_score(predictions,actual=None, max_evidence=5):

f1 = 2.0 * pr * rec / (pr + rec)

return strict_score, acc_score, pr, rec, f1
return strict_score, acc_score, pr, rec, f1

0 comments on commit e954e1d

Please sign in to comment.