Skip to content

Commit

Permalink
Updated to handle metrics via sentence transformers
Browse files Browse the repository at this point in the history
  • Loading branch information
pedrojlazevedo committed Mar 29, 2020
1 parent 5caae2b commit bc8dec7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
32 changes: 24 additions & 8 deletions metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
train_predictions_file = "predictions/predictions_train.jsonl"
else: # type_file == 'dev':
train_file = "data/dev.jsonl"
train_relevant_file = "data/dev_concatenation.jsonl"
train_concatenate_file = "data/dev_concatenation_oie.jsonl"
train_relevant_file = "data/dev_relevant_docs.jsonl"
train_concatenate_file = "data/dev_sentence_selection_doc.jsonl"
train_predictions_file = "predictions/new_predictions_dev.jsonl"
else:
print("Needs to have one argument. Choose:")
Expand Down Expand Up @@ -73,22 +73,27 @@

_claim.add_predicted_docs(claim['predicted_pages'])
_claim.add_predicted_sentences(claim['predicted_sentences'])
if "predicted_pages_ner" in claim:
_claim.add_predicted_docs_ner(claim['predicted_pages_ner'])
_claim.add_predicted_sentences_ner(claim['predicted_sentences_ner'])
_claim.add_predicted_sentences_bert(claim['predicted_sentences'])


for claim in train_concatenate:
_id = claim['id']
_claim = Claim.find_by_id(_id)[0]

if "predicted_pages_ner" in claim:
_claim.add_predicted_docs_ner(claim['predicted_pages_ner'])
_claim.add_predicted_sentences_ner(claim['predicted_sentences_ner'])

if not _claim.verifiable:
continue

_claim.add_predicted_sentences_bert(claim['predicted_sentences_bert'])

# _claim.add_predicted_docs_ner(claim['predicted_pages_ner'])
# _claim.add_predicted_sentences_ner(claim['predicted_sentences_ner'])
_claim.add_predicted_docs_oie(claim['predicted_pages_oie'])
if not _claim.check_evidence_found_doc(_type="all"):
print(str(_claim.get_gold_documents()) + " -- " + str(_claim.get_predicted_documents(_type="all")))
# _claim.add_predicted_docs_oie(claim['predicted_pages_oie'])
# if not _claim.check_evidence_found_doc(_type="all"):
# print(str(_claim.get_gold_documents()) + " -- " + str(_claim.get_predicted_documents(_type="all")))


results = Claim.document_retrieval_stats(claims, _type="tfidf")
Expand Down Expand Up @@ -149,6 +154,17 @@
print("Precision (Sentences Retrieved): \t" + str(results[2]))
print("Recall (Relevant Sentences): \t\t" + str(results[3]))

results = Claim.evidence_extraction_stats(claims, _type="bert")

print("\n################################")
print("# Possible Sentences Only BERT #")
print("################################")
print("Precision (Sentences Retrieved): \t" + str(results[0]))
print("Recall (Relevant Sentences): \t\t" + str(results[1]))
print("\nIF DOCUMENT WAS FOUND CORRECTLY:")
print("Precision (Sentences Retrieved): \t" + str(results[2]))
print("Recall (Relevant Sentences): \t\t" + str(results[3]))

results = Claim.evidence_extraction_stats(claims, _type="all")

print("\n###############################")
Expand Down
11 changes: 11 additions & 0 deletions metrics/claim.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(self, _id, name, verifiable):
self.predicted_docs_oie = []
self.line = []
self.predicted_line = []
self.predicted_evidence_bert = []

def add_gold_evidence(self, document, evidence, line_num):
evidence = Evidence(document, evidence, line_num)
Expand Down Expand Up @@ -58,6 +59,11 @@ def add_predicted_docs_oie(self, docs):
for doc in docs:
self.predicted_docs_oie.append(doc)

def add_predicted_sentences_bert(self, pairs):
for pair in pairs:
e = str(pair[0]), str(pair[1])
self.predicted_evidence_bert.append(e)

def get_gold_documents(self):
docs = set()
for e in self.gold_evidence:
Expand Down Expand Up @@ -94,12 +100,17 @@ def get_predicted_evidence(self, _type="tfidf"):
elif _type == "ner":
evidences = set(self.predicted_evidence_ner)
return evidences
elif _type == "bert":
evidences = set(self.predicted_evidence_bert)
return evidences
else:
evidences = set()
for e in self.predicted_evidence:
evidences.add(e)
for e in self.predicted_evidence_ner:
evidences.add(e)
for e in self.predicted_evidence_bert:
evidences.add(e)
return evidences

def calculate_correct_docs(self, difficulty="all", _type="tfidf"):
Expand Down

0 comments on commit bc8dec7

Please sign in to comment.