Skip to content

Commit

Permalink
Commented and cleaned
Browse files Browse the repository at this point in the history
  • Loading branch information
momandine committed Jul 23, 2013
1 parent cb27e1e commit 6238f69
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
38 changes: 29 additions & 9 deletions cky_algo.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json, pdb, math
from prob_generator import ProbGen

#The probability table cache
PI = {}

def main(pg, rawname, destname):
#It should return a list of json encoded trees.
#How do I create a json thingy?
"""The main function takes a ProbGen class, the name of the file with newline separated
sentences of the passages to be analyzed, and the name of the file to be written with the
json encoded trees."""

sentences = get_sentences(rawname)

py_trees = [cky_recursive(sent, pg) for sent in sentences]
Expand All @@ -15,30 +18,38 @@ def main(pg, rawname, destname):


def cky_recursive(sentence, probgen):
"""Accepts a sentence (list of words) and a probability generator class, returns
a nested list of strings representing the tree"""

print sentence
global PI
PI = {}
PI = {} #Initialize a fresh cache
#Check sentence type
if sentence[-1] == '?':
return cky_help(0, len(sentence) - 1, sentence, 'SBARQ', probgen)
else:
return cky_help(0, len(sentence) - 1, sentence, 'S', probgen)


def cky_help(i,j, sent, X, pg):
"""Accepting the starting position i, ending position j, sentence, parent tag X, and a
probability generator pg, it returns a string with the most likely subtree given the
info and the probability of that subtree"""

if i == j:
if i == j: #Analyzing a single word - must be a unary rule
prob = pg.emm_prob(X, sent[i])
return [X, sent[i]], prob
else:
if not (i, j, X) in PI:
else: #Binary rule
if not (i, j, X) in PI: #Check the cache
PI[(i, j, X)] = get_max_of_all(i, j, sent, X, pg)

left, right, prob = PI[(i, j, X)]

return [X, left, right], prob


def get_max_of_all(i, j, sent, X, pg):
"""Searches through all the combinations of split points and binary rules associated
with the root X, finding the max. Returns null and negative infinity if none found"""

rule_possibilites = pg.binary_counts[X].keys()
best = float("-inf")
Expand All @@ -58,6 +69,7 @@ def get_max_of_all(i, j, sent, X, pg):
right, p_right = cky_help(s+1, j, sent, z, pg)

prob = p_right + p_left + p_rule
<<<<<<< Updated upstream
if i == 0 and j == len(sent) - 1:
print y, z, rule, p_rule, left, p_left, right, p_left

Expand All @@ -68,6 +80,10 @@ def get_max_of_all(i, j, sent, X, pg):
best_right = p_right
RIGHT = right
if prob > best:
=======

if prob > best: #Update
>>>>>>> Stashed changes
best = prob
Y = left
Z = right
Expand All @@ -78,9 +94,9 @@ def get_max_of_all(i, j, sent, X, pg):

return Y, Z, best


#Write a generator!
def get_sentences(rawname):
"""Opens the file of name rawname, returns an array of arrays of strings. The base unit is
a word"""
with open(rawname) as f:
return [line.split() for line in f.readlines()]

Expand All @@ -97,5 +113,9 @@ def write_trees(json_trees, dest_name):
dest.write('\n')

if __name__ == '__main__':
<<<<<<< Updated upstream
main(ProbGen('new.counts'), 'problem_sentences.dat', 'problem_out.dat')
=======
main(ProbGen('new.counts'), 'parse_dev.dat', 'latest_out.dat')
>>>>>>> Stashed changes

2 changes: 0 additions & 2 deletions parse_dev.key
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,6 @@
["SBARQ", ["WHNP+PRON", "What"], ["SBARQ", ["SQ", ["VERB", "is"], ["NP", ["NP", ["DET", "the"], ["NOUN", "nickname"]], ["PP", ["ADP", "of"], ["NP+NOUN", "Pennsylvania"]]]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "Who"], ["SBARQ", ["SQ", ["VERB", "is"], ["NP", ["NOUN", "Desmond"], ["NOUN", "Tutu"]]], [".", "?"]]]
["SBARQ", ["WHADVP", ["ADV", "How"], ["ADJ", "fast"]], ["SBARQ", ["SQ", ["VERB", "can"], ["SQ", ["NP", ["DET", "a"], ["NOUN", "Corvette"]], ["VP+VERB", "go"]]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "What"], ["SBARQ", ["SQ", ["VERB", "are"], ["SQ", ["NP", ["NP", ["NOUN", "John"], ["NP", ["NOUN", "C."], ["NOUN", "Calhoun"]]], ["NP", ["CONJ", "and"], ["NP", ["NOUN", "Henry"], ["NOUN", "Clay"]]]], ["VP", ["VERB", "known"], ["PP+ADP", "as"]]]], [".", "?"]]]
["SBARQ", ["WHADVP+ADV", "When"], ["SBARQ", ["SQ", ["VERB", "was"], ["NP", ["NOUN", "Hurricane"], ["NOUN", "Hugo"]]], [".", "?"]]]
["SBARQ", ["WHADVP+ADV", "When"], ["SBARQ", ["SQ", ["VERB", "did"], ["SQ", ["NP", ["DET", "the"], ["NP", ["ADJ", "Carolingian"], ["NOUN", "period"]]], ["VP+VERB", "begin"]]], [".", "?"]]]
["SBARQ", ["WHADJP", ["ADV", "How"], ["ADJ", "big"]], ["SBARQ", ["SQ", ["VERB", "is"], ["NP+NOUN", "Australia"]], [".", "?"]]]
Expand Down Expand Up @@ -490,7 +489,6 @@
["SBARQ", ["WHADVP+ADV", "When"], ["SBARQ", ["SQ", ["VERB", "was"], ["SQ", ["NP", ["DET", "the"], ["NOUN", "NFL"]], ["VP+VERB", "established"]]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "What"], ["SBARQ", ["SQ", ["VERB", "are"], ["NP+NOUN", "geckos"]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "Who"], ["SBARQ", ["SQ", ["VERB", "is"], ["NP", ["NOUN", "Terrence"], ["NOUN", "Malick"]]], [".", "?"]]]
["SBARQ", ["WHNP", ["DET", "What"], ["NP", ["ADJ", "other"], ["NOUN", "name"]]], ["SBARQ", ["SQ", ["VERB", "were"], ["SQ", ["NP+NP", [".", "``"], ["NP", ["DET", "the"], ["NP", ["ADJ", "Little"], ["NP", ["NOUN", "Rascals"], [".", "<<"]]]]], ["VP", ["VERB", "known"], ["PP+ADP", "as"]]]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "What"], ["SBARQ", ["SQ", ["VERB", "was"], ["NP", ["NP", ["DET", "the"], ["NOUN", "name"]], ["PP", ["ADP", "of"], ["NP", ["DET", "the"], ["NP", [".", "``"], ["NP", ["ADJ", "Little"], ["NP", ["NOUN", "Rascals"], ["NP", [".", "<<"], ["NOUN", "dog"]]]]]]]]], [".", "?"]]]
["SBARQ", ["WHNP", ["WHNP", ["DET", "What"], ["NOUN", "breed"]], ["PP", ["ADP", "of"], ["NP+NOUN", "dog"]]], ["SBARQ", ["SQ", ["VERB", "was"], ["NP", ["DET", "the"], ["NP", [".", "``"], ["NP", ["ADJ", "Little"], ["NP", ["NOUN", "Rascals"], ["NP", [".", "<<"], ["NOUN", "dog"]]]]]]], [".", "?"]]]
["SBARQ", ["WHNP+PRON", "Who"], ["SBARQ", ["SQ+VP", ["VERB", "won"], ["VP", ["NP", ["DET", "the"], ["NP", ["ADJ", "rugby"], ["NP", ["NOUN", "world"], ["NOUN", "cup"]]]], ["PP", ["ADP", "in"], ["NP+NUM", "1987"]]]], [".", "?"]]]
Expand Down

0 comments on commit 6238f69

Please sign in to comment.