Working on generating Relevant Docs files with TF-IDF

DeFacto · Mar 10, 2020 · e954e1d · e954e1d
1 parent 97956ca
commit e954e1d
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 24 deletions.
diff --git a/metrics.py b/metrics.py
@@ -46,7 +46,7 @@
 id : id of the claim
 verifiable : boolean of 1 and 0 with respective meaning
 docs : set of documents that verify the claim
-docs_sep : set of documents seperated
+docs_sep : set of documents separated
 evidences: list of tuples of <doc, line>
 difficulties: list of the number of sentences needed to be evidence
 '''
@@ -64,7 +64,7 @@
 
     # get gold inputs
     gold_documents = set()
-    gold_documents_seperated = set()
+    gold_documents_separated = set()
     sentences_pair = set()
     evidences = claim['evidence']
     difficulties = []
@@ -76,20 +76,20 @@
                 doc_name += str(e[2])
                 doc_name += " "
                 sentences_pair.add((str(e[2]), str(e[3])))  # add gold sentences
-                gold_documents_seperated.add(str(e[2]))  # add the document
+                gold_documents_separated.add(str(e[2]))  # add the document
                 difficulty += 1
             doc_name = doc_name[:-1]  # erase the last blank space
         else:
             doc_name = str(evidence[0][2])
-            gold_documents_seperated.add(str(evidence[0][2]))
+            gold_documents_separated.add(str(evidence[0][2]))
             sentences_pair.add((str(evidence[0][2]), str(evidence[0][3])))
             difficulty = 1
         difficulties.append(difficulty)
         gold_documents.add(doc_name)
     gold_dict['difficulties'] = difficulties
     gold_dict['docs'] = gold_documents
     gold_dict['evidences'] = sentences_pair
-    gold_dict['docs_sep'] = gold_documents_seperated
+    gold_dict['docs_sep'] = gold_documents_separated
 
     gold_data.append(gold_dict)
 
@@ -194,9 +194,6 @@
     if stop == -1:
         break
 
-# scores from fever
-results = fever_score(train_prediction, actual = train_set)
-
 precision_correct /= total_claim
 precision_incorrect /= total_claim
 recall_correct /= total_claim
@@ -214,7 +211,6 @@
 print("Fall-out: " + str(specificity))
 print("Percentage of at least one document found correctly: " + str(doc_found))  # recall
 
-
 precision_sent_correct /= total_claim
 precision_sent_incorrect /= total_claim
 recall_sent_correct /= total_claim
@@ -234,6 +230,9 @@
 print("Percentage of at least one Sentence found correctly: " + str(sent_found_if_doc_found))  # recall
 print("Percentage of at least one Sentence found correctly: " + str(another_sent))  # recall
 
+# scores from fever
+results = fever_score(train_prediction, actual=train_set)
+
 print("\n#########")
 print("# FEVER #")
 print("#########")
@@ -242,4 +241,3 @@
 print("Precision: \t\t\t" + str(results[2]))
 print("Recall: \t\t\t" + str(results[3]))
 print("F1-Score: \t\t\t" + str(results[4]))
-
diff --git a/predict_all.sh b/predict_all.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+mkdir -p data
+mkdir -p data/fever-data
+wget -O data/fever-data/train.jsonl https://s3-eu-west-1.amazonaws.com/fever.public/train.jsonl
+wget -O data/fever-data/dev.jsonl https://s3-eu-west-1.amazonaws.com/fever.public/shared_task_dev.jsonl
diff --git a/scorer.py b/scorer.py
@@ -1,5 +1,6 @@
 import six
 
+
 def check_predicted_evidence_format(instance):
     if 'predicted_evidence' in instance.keys() and len(instance['predicted_evidence']):
         assert all(isinstance(prediction, list)
@@ -11,7 +12,7 @@ def check_predicted_evidence_format(instance):
             "Predicted evidence must be a list of (page,line) lists"
 
         assert all(isinstance(prediction[0], six.string_types)
-                    for prediction in instance["predicted_evidence"]), \
+                   for prediction in instance["predicted_evidence"]), \
             "Predicted evidence must be a list of (page<string>,line<int>) lists"
 
         assert all(isinstance(prediction[1], int)
@@ -24,7 +25,7 @@ def is_correct_label(instance):
 
 
 def is_strictly_correct(instance, max_evidence=None):
-    #Strict evidence matching is only for NEI class
+    # Strict evidence matching is only for NEI class
     check_predicted_evidence_format(instance)
 
     if instance["label"].upper() != "NOT ENOUGH INFO" and is_correct_label(instance):
@@ -33,15 +34,14 @@ def is_strictly_correct(instance, max_evidence=None):
         if max_evidence is None:
             max_evidence = len(instance["predicted_evidence"])
 
-
         for evience_group in instance["evidence"]:
-            #Filter out the annotation ids. We just want the evidence page and line number
+            # Filter out the annotation ids. We just want the evidence page and line number
             actual_sentences = [[e[2], e[3]] for e in evience_group]
-            #Only return true if an entire group of actual sentences is in the predicted sentences
+            # Only return true if an entire group of actual sentences is in the predicted sentences
             if all([actual_sent in instance["predicted_evidence"][:max_evidence] for actual_sent in actual_sentences]):
                 return True
 
-    #If the class is NEI, we don't score the evidence retrieval component
+    # If the class is NEI, we don't score the evidence retrieval component
     elif instance["label"].upper() == "NOT ENOUGH INFO" and is_correct_label(instance):
         return True
 
@@ -56,7 +56,7 @@ def evidence_macro_precision(instance, max_evidence=None):
         all_evi = [[e[2], e[3]] for eg in instance["evidence"] for e in eg if e[3] is not None]
 
         predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
-                                                                        instance["predicted_evidence"][:max_evidence]
+            instance["predicted_evidence"][:max_evidence]
 
         for prediction in predicted_evidence:
             if prediction in all_evi:
@@ -67,15 +67,16 @@ def evidence_macro_precision(instance, max_evidence=None):
 
     return 0.0, 0.0
 
+
 def evidence_macro_recall(instance, max_evidence=None):
     # We only want to score F1/Precision/Recall of recalled evidence for NEI claims
     if instance["label"].upper() != "NOT ENOUGH INFO":
         # If there's no evidence to predict, return 1
         if len(instance["evidence"]) == 0 or all([len(eg) == 0 for eg in instance]):
-           return 1.0, 1.0
+            return 1.0, 1.0
 
         predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
-                                                                        instance["predicted_evidence"][:max_evidence]
+            instance["predicted_evidence"][:max_evidence]
 
         for evidence_group in instance["evidence"]:
             evidence = [[e[2], e[3]] for e in evidence_group]
@@ -103,7 +104,7 @@ def evidence_micro_precision(instance):
     return this_precision, this_precision_hits
 
 
-def fever_score(predictions,actual=None, max_evidence=5):
+def fever_score(predictions, actual=None, max_evidence=5):
     correct = 0
     strict = 0
 
@@ -113,10 +114,10 @@ def fever_score(predictions,actual=None, max_evidence=5):
     macro_recall = 0
     macro_recall_hits = 0
 
-    for idx,instance in enumerate(predictions):
+    for idx, instance in enumerate(predictions):
         assert 'predicted_evidence' in instance.keys(), 'evidence must be provided for the prediction'
 
-        #If it's a blind test set, we need to copy in the values from the actual data
+        # If it's a blind test set, we need to copy in the values from the actual data
         if 'evidence' not in instance or 'label' not in instance:
             assert actual is not None, 'in blind evaluation mode, actual data must be provided'
             assert len(actual) == len(predictions), 'actual data and predicted data length must match'
@@ -130,7 +131,7 @@ def fever_score(predictions,actual=None, max_evidence=5):
             correct += 1.0
 
             if is_strictly_correct(instance, max_evidence):
-                strict+=1.0
+                strict += 1.0
 
         macro_prec = evidence_macro_precision(instance, max_evidence)
         macro_precision += macro_prec[0]
@@ -150,4 +151,4 @@ def fever_score(predictions,actual=None, max_evidence=5):
 
     f1 = 2.0 * pr * rec / (pr + rec)
 
-    return strict_score, acc_score, pr, rec, f1
+    return strict_score, acc_score, pr, rec, f1