Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
techofer committed Dec 23, 2024
1 parent e4c1491 commit 623e8a0
Show file tree
Hide file tree
Showing 10,038 changed files with 57 additions and 10,723 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
672 changes: 0 additions & 672 deletions nbs/benchmark/001_covid.ipynb

This file was deleted.

71 changes: 56 additions & 15 deletions nbs/benchmark/covid_bench.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
import time

start_time = time.time()

from glob import glob
import pandas as pd
import spacy
from pandas import DataFrame
from pathlib import Path
from spannerlib import get_magic_session,Session,Span
sess = get_magic_session()
from spannerlib import get_magic_session,Session
from spannerlib.ie_func.basic import rgx, rgx_split, rgx_is_match, span_contained, span_arity

VERSION = "SPANNERFLOW"
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
from spannerflow.span import Span
else:
from spannerlib import Span

def is_adjacent(span1,span2):
yield span1.name==span2.name and span1.end +1 == span2.start




import spacy
nlp = spacy.load("en_core_web_sm")

# configurations
slog_file = Path('covid_bench_logic.pl')
input_dir = Path('covid_data/sample_inputs')
data_dir = Path('covid_data/rules_data')

start_time = time.time()


def split_sentence(text):
"""
Expand Down Expand Up @@ -138,10 +153,20 @@ def rewrite_docs(docs,span_label,new_version):
def main(input_dir,data_dir,logic_file, start=0, end=10):
global file_paths
sess = Session()
sess.register('py_rgx', rgx, [str, Span], span_arity)
sess.register('py_rgx_split', rgx_split, [str, Span], [Span,Span])
sess.register('py_rgx_is_match', rgx_is_match, [str, Span], [bool])
sess.register('py_span_contained', span_contained, [Span, Span], [bool])
sess.register('is_adjacent',is_adjacent,[Span,Span],[bool])
# define callback functions
sess.register('split_sentence',split_sentence,[(str,Span)],[Span])
sess.register('pos',pos_annotator,[(Span,str)],[Span,str])
sess.register('lemma',lemmatizer,[(Span,str)],[Span,str])
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.register('split_sentence',split_sentence,[Span],[Span])
sess.register('pos',pos_annotator,[Span],[Span,str])
sess.register('lemma',lemmatizer,[Span],[Span,str])
else:
sess.register('split_sentence',split_sentence,[str],[Span])
sess.register('pos',pos_annotator,[str],[Span,str])
sess.register('lemma',lemmatizer,[str],[Span,str])
sess.register_agg('agg_mention',agg_mention,[str],[str])
sess.register_agg('agg_doc_tags',AggDocumentTags,[str],[str])

Expand Down Expand Up @@ -169,7 +194,10 @@ def main(input_dir,data_dir,logic_file, start=0, end=10):
[p.name,p.read_text(),'raw_text'] for p in file_paths
],columns=['Path','Doc','Version']
)
sess.import_rel('Docs',raw_docs)
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.import_rel('Docs',raw_docs, scheme=[str, Span, str])
else:
sess.import_rel('Docs',raw_docs)

# load logic, note that since we did not define the data relations in the logic file,
# we need to load the logic after the data has been loaded
Expand All @@ -178,19 +206,32 @@ def main(input_dir,data_dir,logic_file, start=0, end=10):
## Rewritting the documents
lemma_tags = sess.export('?Lemmas(P,D,W,L)')
lemma_docs = rewrite_docs(raw_docs,lemma_tags,'lemma')
sess.import_rel('Docs',lemma_docs)
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.import_rel('Docs',lemma_docs, scheme=[str, Span, str])
else:
sess.import_rel('Docs',lemma_docs)


lemma_concept_matches = sess.export('?LemmaConceptMatches(Path,Doc,Span,Label)')
lemma_concepts = rewrite_docs(lemma_docs,lemma_concept_matches,'lemma_concept')
sess.import_rel('Docs',lemma_concepts)
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.import_rel('Docs',lemma_concepts, scheme=[str, Span, str])
else:
sess.import_rel('Docs',lemma_concepts)

pos_concept_matches = sess.export('?PosConceptMatches(P,D,W,L)')
pos_concept_docs = rewrite_docs(lemma_concepts,pos_concept_matches,'pos_concept')
sess.import_rel('Docs',pos_concept_docs)
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.import_rel('Docs',pos_concept_docs, scheme=[str, Span, str])
else:
sess.import_rel('Docs',pos_concept_docs)

target_matches = sess.export('?TargetMatches(P,D,W,L)')
target_rule_docs = rewrite_docs(pos_concept_docs,target_matches,'target_concept')
sess.import_rel('Docs',target_rule_docs)
if VERSION in ["SPANNERFLOW", "SPANNERFLOW_PYTHON_IE"]:
sess.import_rel('Docs',target_rule_docs, scheme=[str, Span, str])
else:
sess.import_rel('Docs',target_rule_docs)

## computing the tags based on the target concept documents
doc_tags = sess.export('?DocumentTags(P,T)')
Expand All @@ -203,9 +244,9 @@ def main(input_dir,data_dir,logic_file, start=0, end=10):

return classification


for i in range(0, 1000, 20):
res = main(input_dir,data_dir,slog_file, start=i, end=i+20)
k = 50
for i in range(0, 100-k, k):
res = main(input_dir,data_dir,slog_file, start=i, end=i+k)
print(res)

end_time = time.time()
Expand Down
2 changes: 1 addition & 1 deletion nbs/benchmark/covid_bench_logic.pl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

Sents(P,S)<-Docs(P,D,"target_concept"),split_sentence(D)->(S).

SentPairs(P,S1,S2)<-Sents(P,S1),Sents(P,S2),expr_eval("{0}.end +1 == {1}.start",S1,S2)->(True).
SentPairs(P,S1,S2)<-Sents(P,S1),Sents(P,S2),is_adjacent(S1,S2)->(True).

# first we get the covid mentions and their surrounding sentences, using the span_contained ie function
CovidMentions(Path, Span) <- Docs(Path,D,"target_concept"), rgx("COVID-19",D) -> (Span).
Expand Down
1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample100.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1000.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10000.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10001.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10002.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10003.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10004.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10005.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10006.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10007.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10008.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10009.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1001.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10010.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10011.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10012.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10013.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10014.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10015.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10016.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10017.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10018.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10019.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1002.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10020.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10021.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10022.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10023.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10024.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10025.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10026.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10027.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10028.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10029.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1003.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10030.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10031.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10032.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10033.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10034.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10035.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10036.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10037.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10038.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10039.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1004.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10040.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10041.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10042.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10043.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10044.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample10045.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1005.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1006.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1007.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1008.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1009.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample101.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1010.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1011.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1012.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1013.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1014.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1015.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1016.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1017.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1018.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1019.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample102.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1020.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1021.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1022.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1023.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1024.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1025.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1026.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1027.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1028.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1029.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample103.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1030.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1031.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1032.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1033.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1034.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1035.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1036.txt

This file was deleted.

1 change: 0 additions & 1 deletion nbs/benchmark/covid_data/temp/sample1037.txt

This file was deleted.

Loading

0 comments on commit 623e8a0

Please sign in to comment.