diff --git a/generate_rte_preds.py b/generate_rte_preds.py index 8fc2d678..1cd554dd 100644 --- a/generate_rte_preds.py +++ b/generate_rte_preds.py @@ -76,6 +76,7 @@ def run_rte(claim, evidence, claim_num): for line in full_lines: lines.append(line['content']) + lines[sentence[1]] = lines[sentence[1]].strip() lines[sentence[1]] = lines[sentence[1]].replace("-LRB-", " ( ") lines[sentence[1]] = lines[sentence[1]].replace("-RRB-", " ) ") @@ -88,6 +89,24 @@ def run_rte(claim, evidence, claim_num): zero_results += 1 potential_evidence_sentences.append("Nothing") evidence.append(["Nothing", 0]) + relevant_docs, entities = doc_retrieval.getRelevantDocs(claim, wiki_entities, "spaCy", + nlp) # "spaCy", nlp)# + print(relevant_docs) + # print(entities) + relevant_sentences = sentence_retrieval.getRelevantSentences(relevant_docs, entities, wiki_split_docs_dir) + # print(relevant_sentences) + + predicted_evidence = [] + for sent in relevant_sentences: + predicted_evidence.append((sent['id'], sent['line_num'])) + potential_evidence_sentences.append(sent['sentence']) + evidence.append((sent['id'], sent['line_num'])) + + instances[i]['predicted_pages_ner'] = relevant_docs + instances[i]['predicted_sentences_ner'] = predicted_evidence + + writer_c.write(instances[i]) + print("Claim number: " + str(i) + " of " + str(len(instances))) preds = run_rte(claim, potential_evidence_sentences, claim_num) @@ -106,22 +125,5 @@ def run_rte(claim, evidence, claim_num): claim_num += 1 # print(claim_num) # print(instances[i]) - relevant_docs, entities = doc_retrieval.getRelevantDocs(claim, wiki_entities, "spaCy", - nlp) # "spaCy", nlp)# - print(relevant_docs) - # print(entities) - relevant_sentences = sentence_retrieval.getRelevantSentences(relevant_docs, entities, wiki_split_docs_dir) - # print(relevant_sentences) - - predicted_evidence = [] - for sent in relevant_sentences: - predicted_evidence.append((sent['id'], sent['line_num'])) - - print(predicted_evidence) - instances[i]['predicted_pages_ner'] = relevant_docs - instances[i]['predicted_sentences_ner'] = predicted_evidence - - writer_c.write(instances[i]) - print("Claim number " + str(i) + " of " + str(len(instances))) print("Number of Zero Sentences Found: " + str(zero_results)) diff --git a/metrics.py b/metrics.py index 7287bff5..c791a450 100644 --- a/metrics.py +++ b/metrics.py @@ -20,7 +20,7 @@ train_file = "data/dev.jsonl" train_relevant_file = "data/dev_relevant_docs.jsonl" train_concatenate_file = "data/dev_concatenation.jsonl" - train_predictions_file = "predictions/new_predictions_dev.jsonl" + train_predictions_file = "predictions/new_predictions_dev_ner.jsonl" else: print("Needs to have one argument. Choose:") print("train") @@ -66,7 +66,7 @@ for claim in train_relevant: _id = claim['id'] _claim = Claim.find_by_id(_id)[0] - + _claim.line = claim # no search is needed... no information on gold about retrieval if not _claim.verifiable: continue @@ -152,3 +152,49 @@ print("Precision: \t\t\t" + str(results[2])) print("Recall: \t\t\t" + str(results[3])) print("F1-Score: \t\t\t" + str(results[4])) + +predictions_if_doc_found = [] +claims_if_doc_found = [] + +for claim in train_prediction: + _id = claim['id'] + _claim = Claim.find_by_id(_id)[0] + + if _claim.check_evidence_found_doc(_type="tfidf"): + claims_if_doc_found.append(_claim.line) + predictions_if_doc_found.append(claim) + +# scores from fever +results = fever_score(predictions_if_doc_found, actual=claims_if_doc_found) + +print("\n#######################") +print("# FEVER If Doc Found! #") +print("#######################") +print("Strict_score: \t\t\t" + str(results[0])) +print("Acc_score: \t\t\t" + str(results[1])) +print("Precision: \t\t\t" + str(results[2])) +print("Recall: \t\t\t" + str(results[3])) +print("F1-Score: \t\t\t" + str(results[4])) + +predictions_if_evidence_found = [] +claims_if_evidence_found = [] + +for claim in train_prediction: + _id = claim['id'] + _claim = Claim.find_by_id(_id)[0] + + if _claim.check_evidence_was_found(_type="tfidf"): + claims_if_evidence_found.append(_claim.line) + predictions_if_evidence_found.append(claim) + +# scores from fever +results = fever_score(predictions_if_evidence_found, actual=claims_if_evidence_found) + +print("\n############################") +print("# FEVER If Sentence Found! #") +print("############################") +print("Strict_score: \t\t\t" + str(results[0])) +print("Acc_score: \t\t\t" + str(results[1])) +print("Precision: \t\t\t" + str(results[2])) +print("Recall: \t\t\t" + str(results[3])) +print("F1-Score: \t\t\t" + str(results[4])) diff --git a/metrics/claim.py b/metrics/claim.py index f8587ba0..8aa075fe 100644 --- a/metrics/claim.py +++ b/metrics/claim.py @@ -18,6 +18,8 @@ def __init__(self, _id, name, verifiable): self.predicted_evidence = [] self.predicted_docs_ner = [] self.predicted_evidence_ner = [] + self.line = [] + self.predicted_line = [] def add_gold_evidence(self, document, evidence, line_num): evidence = Evidence(document, evidence, line_num) @@ -112,24 +114,17 @@ def calculate_correct_sentences(self, difficulty="all", _type="tfidf"): def check_evidence_found_doc(self, _type="tfidf"): gold_docs = self.get_gold_documents() - if _type == "tfidf": - for doc in self.predicted_docs: - if doc in gold_docs: - return True - return False - elif _type == "ner": - for doc in self.predicted_docs_ner: - if doc in gold_docs: - return True - return False - else: - for doc in self.predicted_docs: - if doc in gold_docs: - return True - for doc in self.predicted_docs_ner: - if doc in gold_docs: - return True - return False + for doc in self.get_predicted_documents(_type=_type): + if doc in gold_docs: + return True + return False + + def check_evidence_was_found(self, _type="tfidf"): + gold_pairs = self.get_gold_pairs() + for sent in self.get_predicted_evidence(_type=_type): + if sent in gold_pairs: + return True + return False @classmethod def find_by_id(cls, _id): diff --git a/predict_old.py b/predict_old.py index 69cbf8f8..dbfa3d73 100644 --- a/predict_old.py +++ b/predict_old.py @@ -17,9 +17,9 @@ print("#" * 10) print("Parameters should be:\n test_file\n results_file \n concatenate_file\nDefaults being used\n") print("#" * 10) - test_file = "data/subsample_train_relevant_docs.jsonl" + test_file = "data/dev_relevant_docs.jsonl" results_file = "predictions_sanity.jsonl" - concatenate_file = "data/subsample_train_concatenation.jsonl" + concatenate_file = "data/subsample_dev_concatenation.jsonl" nlp = spacy.load('en_core_web_lg') @@ -47,13 +47,13 @@ with jsonlines.open(results_file, mode='w') as writer_r, \ jsonlines.open(concatenate_file, mode='w') as writer_c: for example in test_set: - relevant_docs, entities = doc_retrieval.getRelevantDocs(example['claim'], wiki_entities, "StanfordNER", nlp)#"spaCy", nlp)# + relevant_docs, entities = doc_retrieval.getRelevantDocs(example['claim'], wiki_entities, "spaCy", nlp) relevant_docs = list(set(relevant_docs)) print(example['claim']) - print(relevant_docs) - print(entities) + # print(relevant_docs) + # print(entities) relevant_sentences = sentence_retrieval.getRelevantSentences(relevant_docs, entities, wiki_split_docs_dir) - print(relevant_sentences) + # print(relevant_sentences) for i in range(len(example['predicted_sentences'])): # load document from TF-IDF @@ -85,11 +85,11 @@ 'sentence': lines[example['predicted_sentences'][i][1]] } relevant_sentences.append(temp) - print(relevant_sentences) + # print(relevant_sentences) relevant_docs = relevant_docs + list(example['predicted_pages']) relevant_docs = list(set(relevant_docs)) - print("DOCS: ") - print(relevant_docs) + # print("DOCS: ") + # print(relevant_docs) result = rte.textual_entailment_evidence_retriever(example['claim'], relevant_sentences, claim_id) claim_id = claim_id + 1 final_result = {'id': example['id'], @@ -104,8 +104,9 @@ # introduce extraction information performed by NER example['predicted_pages_ner'] = relevant_docs - example['predicted_sentences_final'] = predicted_evidence + example['predicted_sentences_ner'] = predicted_evidence # save info of predictions based on concatenation writer_c.write(example) writer_r.write(final_result) + print("Claim number: " + str(claim_id) + " of " + str(len(test_set))) diff --git a/train_label_classifier.py b/train_label_classifier.py index 41a72e52..e404542a 100644 --- a/train_label_classifier.py +++ b/train_label_classifier.py @@ -195,8 +195,8 @@ def predict_test(predictions_test, entailment_predictions_test, new_predictions_ entailment_predictions_train = "rte/entailment_predictions_train" predictions_test = "data/dev.jsonl" -entailment_predictions_test = "rte/entailment_predictions" -new_predictions_file = "predictions/new_predictions_dev.jsonl" +entailment_predictions_test = "rte/entailment_predictions_dev_ner" +new_predictions_file = "predictions/new_predictions_dev_ner.jsonl" x_train, y_train = populate_train(gold_train, entailment_predictions_train) # x_test = x_train[7000:]