Skip to content

Commit

Permalink
updated darmstadt file splits and processing script for test
Browse files Browse the repository at this point in the history
  • Loading branch information
jerbarnes committed Jan 10, 2022
1 parent e28be55 commit 2ac7c3f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
1 change: 1 addition & 0 deletions data/darmstadt_unis/full_splits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"train": ["Jones_International_University_4_11-09-2005", "University_of_Phoenix_Online_179_10-05-2004", "DeVry_University_81_08-14-2005", "DeVry_University_16_05-16-2008", "DeVry_University_48_04-20-2007", "DeVry_University_86_02-03-2005", "University_of_Maryland_University_College_19_03-07-2006", "Jones_International_University_1_04-19-2008", "University_of_Phoenix_Online_59_08-05-2007", "DeVry_University_22_02-27-2008", "University_of_Phoenix_Online_178_10-13-2004", "DeVry_University_4_06-29-2008", "University_of_Phoenix_Online_128_11-10-2005", "Capella_University_36_01-30-2007", "DeVry_University_94_07-02-2004", "DeVry_University_69_01-01-2006", "DeVry_University_30_10-26-2007", "Capella_University_5_07-29-2008", "University_of_Phoenix_Online_73_05-26-2007", "Capella_University_84_07-16-2004", "Colorado_Technical_University_Online_74_08-17-2005", "DeVry_University_66_01-14-2006", "DeVry_University_90_11-19-2004", "Colorado_Technical_University_Online_3_05-27-2008", "University_of_Phoenix_Online_139_10-09-2005", "American_InterContinental_University_Online_7_01-04-2007", "DeVry_University_53_04-09-2007", "DeVry_University_62_11-29-2006", "University_of_Phoenix_Online_11_06-08-2008", "Colorado_Technical_University_Online_68_10-15-2005", "University_of_Phoenix_Online_152_08-03-2005", "University_of_Phoenix_Online_1_08-06-2008", "University_of_Phoenix_Online_72_05-31-2007", "Capella_University_47_01-06-2006", "Capella_University_61_08-18-2005", "DeVry_University_32_08-01-2007", "DeVry_University_10_05-22-2008", "DeVry_University_72_10-31-2005", "Colorado_Technical_University_Online_69_10-14-2005", "University_of_Phoenix_Online_120_12-11-2005", "Capella_University_2_08-03-2008", "University_of_Phoenix_Online_133_10-27-2005", "Jones_International_University_6_02-23-2005", "University_of_Phoenix_Online_63_07-02-2007", "DeVry_University_29_11-15-2007", "University_of_Phoenix_Online_151_08-04-2005", "University_of_Phoenix_Online_88_03-29-2007", "University_of_Phoenix_Online_74_05-24-2007", "DeVry_University_43_05-09-2007", "St_Leo_University_2_03-24-2007", "Capella_University_39_12-10-2006", "Colorado_Technical_University_Online_23_09-11-2007", "Northcentral_Online_University_1_03-18-2008", "Colorado_Technical_University_Online_37_04-16-2007", "Capella_University_8_07-06-2008", "Colorado_Technical_University_Online_65_10-31-2005", "University_of_Maryland_University_College_10_07-11-2007", "University_of_Phoenix_Online_48_09-27-2007", "University_of_Phoenix_Online_30_12-11-2007", "DeVry_University_67_01-06-2006", "University_of_Phoenix_Online_61_07-28-2007", "Colorado_Technical_University_Online_51_12-15-2006", "Capella_University_59_09-06-2005", "University_of_Maryland_University_College_31_09-12-2004", "Capella_University_60_08-31-2005", "University_of_Phoenix_Online_107_02-28-2006", "University_of_Phoenix_Online_21_02-09-2008", "Capella_University_64_07-23-2005", "Colorado_Technical_University_Online_9_04-07-2008", "DeVry_University_21_03-17-2008", "University_of_Phoenix_Online_8_07-07-2008", "University_of_Maryland_University_College_39_07-25-2004", "University_of_Phoenix_Online_145_09-19-2005", "University_of_Phoenix_Online_130_11-08-2005", "University_of_Phoenix_Online_47_10-03-2007", "University_of_Phoenix_Online_3_07-27-2008", "DeVry_University_63_11-28-2006", "Capella_University_29_05-01-2007", "DeVry_University_88_01-04-2005", "University_of_Maryland_University_College_4_01-03-2008", "American_InterContinental_University_Online_2_03-14-2008", "DeVry_University_42_05-09-2007", "University_of_Phoenix_Online_149_08-15-2005", "Colorado_Technical_University_Online_35_04-25-2007", "Capella_University_65_05-08-2005", "Capella_University_42_03-06-2006", "University_of_Phoenix_Online_90_03-05-2007", "DeVry_University_17_05-10-2008", "Capella_University_16_02-06-2008", "DeVry_University_73_10-18-2005", "University_of_Maryland_University_College_35_08-14-2004", "University_of_Phoenix_Online_80_04-23-2007", "University_of_Phoenix_Online_42_10-12-2007", "Colorado_Technical_University_Online_46_02-09-2007", "University_of_Phoenix_Online_187_08-20-2004", "University_of_Phoenix_Online_68_06-07-2007", "Colorado_Technical_University_Online_21_09-13-2007", "University_of_Phoenix_Online_92_01-30-2007", "Capella_University_69_02-01-2005", "Colorado_Technical_University_Online_1_07-10-2008", "Colorado_Technical_University_Online_63_12-12-2005", "Capella_University_41_03-08-2006", "University_of_Phoenix_Online_38_11-05-2007", "University_of_Phoenix_Online_7_07-09-2008", "Colorado_Technical_University_Online_45_03-06-2007", "DeVry_University_56_02-01-2007", "Capella_University_7_07-23-2008", "University_of_Phoenix_Online_112_01-09-2006", "University_of_Phoenix_Online_41_10-14-2007", "Colorado_Technical_University_Online_2_06-11-2008", "DeVry_University_33_07-26-2007", "University_of_Phoenix_Online_189_07-25-2004", "Capella_University_82_07-17-2004", "DeVry_University_1_08-06-2008", "DeVry_University_44_05-09-2007", "University_of_Phoenix_Online_18_04-04-2008", "Colorado_Technical_University_Online_8_04-09-2008", "University_of_Phoenix_Online_106_03-31-2006", "University_of_Phoenix_Online_66_06-21-2007", "Capella_University_18_12-18-2007", "Capella_University_67_02-10-2005", "University_of_Phoenix_Online_85_04-13-2007", "Capella_University_75_10-03-2004", "Colorado_Technical_University_Online_5_05-02-2008", "University_of_Phoenix_Online_115_12-28-2005", "University_of_Phoenix_Online_28_12-20-2007", "Colorado_Technical_University_Online_73_09-16-2005", "DeVry_University_52_04-11-2007", "University_of_Phoenix_Online_60_08-03-2007", "Colorado_Technical_University_Online_40_03-26-2007", "University_of_Phoenix_Online_97_12-20-2006", "University_of_Phoenix_Online_148_08-17-2005", "University_of_Phoenix_Online_31_12-03-2007", "Capella_University_20_11-06-2007", "University_of_Maryland_University_College_32_08-27-2004", "University_of_Maryland_University_College_27_11-02-2005", "DeVry_University_51_04-14-2007", "University_of_Phoenix_Online_134_10-27-2005", "Colorado_Technical_University_Online_18_10-03-2007", "University_of_Phoenix_Online_169_01-13-2005", "University_of_Phoenix_Online_17_04-07-2008", "American_InterContinental_University_Online_5_07-09-2007", "Capella_University_73_11-23-2004", "University_of_Phoenix_Online_16_04-30-2008", "University_of_Phoenix_Online_118_12-14-2005", "Colorado_Technical_University_Online_52_12-14-2006", "University_of_Phoenix_Online_24_01-20-2008", "University_of_Phoenix_Online_138_10-10-2005", "Colorado_Technical_University_Online_6_04-25-2008", "Colorado_Technical_University_Online_61_01-28-2006", "Capella_University_14_03-09-2008", "University_of_Phoenix_Online_141_10-05-2005", "Colorado_Technical_University_Online_49_12-31-2006", "Colorado_Technical_University_Online_22_09-13-2007", "University_of_Phoenix_Online_46_10-03-2007", "University_of_Phoenix_Online_125_11-19-2005", "University_of_Maryland_University_College_17_08-03-2006", "DeVry_University_25_12-14-2007", "Capella_University_72_11-23-2004", "University_of_Maryland_University_College_1_06-10-2008", "DeVry_University_36_06-10-2007", "DeVry_University_85_02-22-2005", "University_of_Maryland_University_College_18_04-07-2006", "University_of_Phoenix_Online_135_10-13-2005", "University_of_Phoenix_Online_39_11-02-2007", "University_of_Phoenix_Online_123_12-05-2005", "University_of_Phoenix_Online_54_08-23-2007", "Colorado_Technical_University_Online_36_04-19-2007", "University_of_Phoenix_Online_167_02-03-2005", "University_of_Phoenix_Online_121_12-08-2005", "University_of_Phoenix_Online_89_03-14-2007", "DeVry_University_8_06-05-2008", "Capella_University_22_10-13-2007", "Colorado_Technical_University_Online_13_01-25-2008", "DeVry_University_27_12-08-2007", "University_of_Maryland_University_College_23_12-07-2005", "University_of_Phoenix_Online_15_05-15-2008", "University_of_Phoenix_Online_5_07-21-2008", "University_of_Phoenix_Online_180_09-29-2004", "University_of_Phoenix_Online_131_11-08-2005", "Colorado_Technical_University_Online_54_12-07-2006", "University_of_Phoenix_Online_99_11-19-2006", "University_of_Phoenix_Online_101_08-09-2006", "Colorado_Technical_University_Online_34_04-27-2007", "Colorado_Technical_University_Online_33_05-09-2007", "Capella_University_44_01-31-2006", "University_of_Maryland_University_College_29_03-06-2005", "Colorado_Technical_University_Online_39_03-27-2007", "University_of_Phoenix_Online_137_10-10-2005", "University_of_Phoenix_Online_19_03-18-2008", "University_of_Phoenix_Online_87_04-04-2007"], "dev": ["St_Leo_University_4_04-16-2004", "DeVry_University_31_08-05-2007", "University_of_Maryland_University_College_30_10-31-2004", "University_of_Phoenix_Online_27_12-21-2007", "Colorado_Technical_University_Online_76_06-17-2005", "Capella_University_80_09-21-2004", "Capella_University_4_08-01-2008", "University_of_Maryland_University_College_37_07-30-2004", "DeVry_University_15_05-19-2008", "University_of_Phoenix_Online_153_07-27-2005", "Capella_University_55_10-05-2005", "University_of_Phoenix_Online_83_04-16-2007", "University_of_Maryland_University_College_3_01-03-2008", "Colorado_Technical_University_Online_55_09-08-2006", "University_of_Phoenix_Online_185_09-06-2004", "University_of_Phoenix_Online_51_09-10-2007", "Capella_University_50_12-09-2005", "DeVry_University_95_05-16-2004", "Capella_University_40_05-24-2006", "DeVry_University_93_08-08-2004", "University_of_Phoenix_Online_109_02-09-2006", "University_of_Phoenix_Online_174_11-02-2004", "University_of_Phoenix_Online_192_06-15-2004", "University_of_Maryland_University_College_20_03-06-2006"], "test": ["DeVry_University_7_06-07-2008", "Capella_University_79_09-23-2004", "DeVry_University_12_05-20-2008", "Capella_University_54_10-09-2005", "University_of_Phoenix_Online_183_09-08-2004", "DeVry_University_87_01-21-2005", "DeVry_University_24_12-20-2007", "University_of_Phoenix_Online_64_07-01-2007", "University_of_Phoenix_Online_70_06-02-2007", "University_of_Phoenix_Online_154_07-25-2005", "Colorado_Technical_University_Online_26_07-21-2007", "University_of_Phoenix_Online_34_11-18-2007", "University_of_Phoenix_Online_124_12-02-2005", "Capella_University_32_03-17-2007", "Capella_University_9_05-18-2008", "University_of_Phoenix_Online_173_11-09-2004", "Capella_University_49_01-01-2006", "University_of_Maryland_University_College_33_08-20-2004", "DeVry_University_82_07-29-2005", "University_of_Phoenix_Online_170_01-07-2005", "University_of_Phoenix_Online_100_10-06-2006", "Colorado_Technical_University_Online_47_02-07-2007", "Colorado_Technical_University_Online_4_05-03-2008", "University_of_Maryland_University_College_11_05-15-2007", "Capella_University_13_03-14-2008"]}
7 changes: 4 additions & 3 deletions data/darmstadt_unis/process_darmstadt.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,10 @@ def get_files(current_dir):
if __name__ == "__main__":

basedir = "DarmstadtServiceReviewCorpus"
processed = {"train": [], "dev": []}
processed = {"train": [], "dev": [], "test": []}
corpus = "universities"

with open("splits.json") as infile:
with open("full_splits.json") as infile:
splits = json.load(infile)

current_dir = os.path.join(basedir, corpus)
Expand All @@ -465,12 +465,13 @@ def get_files(current_dir):
#break into train, dev splits
train = [filenames for filenames in ff if filenames[0].split("_words")[0] in splits["train"]]
dev = [filenames for filenames in ff if filenames[0].split("_words")[0] in splits["dev"]]
test = [filenames for filenames in ff if filenames[0].split("_words")[0] in splits["test"]]


# import list of
problematic_sentences = [line.strip() for line in open("problematic.txt")]

for subname, subcorpus in [("train", train), ("dev", dev)]:
for subname, subcorpus in [("train", train), ("dev", dev), ("test", test)]:

for bf, mf in subcorpus:
bfile = os.path.join(current_dir, "basedata", bf)
Expand Down

0 comments on commit 2ac7c3f

Please sign in to comment.