Skip to content

Commit

Permalink
Finished commenting, reordering of functions within file
Browse files Browse the repository at this point in the history
  • Loading branch information
momandine committed Jul 15, 2013
1 parent 917a3f1 commit b82cce1
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions replace_rare_tree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This script and its memeber fuunctions can be used to take a JSON encripted
list of trees, separated by newlines, and a file of the unary rule counts (i.e.
NOUN -> apple) and replace the terminals with (apple) fewer than 5 counts given
a specific tag (NOUN) with '_RARE_'"""
a specific tag (NOUN) with '_RARE_' """

import json
from sys import argv
Expand All @@ -21,13 +21,18 @@ def get_rares(countsname):
if int(count) < 5:
rares.add((word, tag))
return rares

def get_json_trees_from_file(filename):
"""Takes the name of a file with json format lexical trees, and laods them,
returning an array of the loaded trees"""

tree_strings = open(filename).readlines()
return [json.loads(line) for line in tree_strings]


def recursive_rr(tree, rareset):
"""Takes a nested list in loaded JSON format and a set of rare (word, tag) pairs
and returns the """ #TODO fix this thingy.
#tree must be a json object
#TODO: Make flexible, make a decorator with two functions as arguments? one for terminals
#one for non-terminals
and returns the """
if len(tree) == 2:
tag = tree[0]
word = tree[1]
Expand All @@ -38,28 +43,25 @@ def recursive_rr(tree, rareset):
recursive_rr(tree[2], rareset)
else:
error("Not lexical tree")
#TODO: Apply to every item in a list more directly? Well that's what list comprehension does
#

def replace_all_trees(jtrees, rareset): #Apply to every tree in list, make more general.
def replace_all_trees(jtrees, rareset):
"""Apply replace_rare to all of the trees in the list of trees"""
for tree in jtrees:
recursive_rr(tree, rareset)

def write_trees(dest_name, json_trees):
"""Takes a name for the destination function and a list of json encoded trees
and prints to file, separated by newline characters"""

dest = open(dest_name, 'w')

for tree in json_trees:
dest.write(json.dumps(tree))
dest.write('\n')

def get_json_trees_from_file(filename):
tree_strings = open(filename).readlines()
return [json.loads(line) for line in tree_strings]

#TODO: Make ordering more logical

if __name__ == "__main__":
"""To run in one go using command line arguments"""

script, counts_file, train_file, dest_file = argv

rares = get_rares(counts_file)
Expand Down

0 comments on commit b82cce1

Please sign in to comment.