Skip to content

Commit

Permalink
Merge pull request #57 from andrewjpage/master
Browse files Browse the repository at this point in the history
unquote tree nodes for fastml and check if too much is filtered out
  • Loading branch information
andrewjpage committed Feb 14, 2013
2 parents 10b7ae0 + 5dfb31f commit 4f2d34d
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions run_gubbins.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,12 @@ def reroot_tree_at_midpoint(tree_name):
tree = dendropy.Tree.get_from_path(tree_name, 'newick',
preserve_underscores=True)
split_all_non_bi_nodes(tree.seed_node)

tree.reroot_at_midpoint(update_splits=True, delete_outdegree_one=False)
tree.deroot()
tree.update_splits()
tree.write_to_path(
tree_name,
'newick',
output_tree_string = tree.as_string(
'newick'
taxon_set=None,
suppress_leaf_taxon_labels=False,
suppress_leaf_node_labels=True,
Expand All @@ -127,7 +126,11 @@ def reroot_tree_at_midpoint(tree_name):
annotations_as_nhx=False,
suppress_item_comments=True,
node_label_element_separator=' ',
node_label_compose_func=None)
node_label_compose_func=None
)
output_file = open(tree_name, 'w+')
output_file.write(output_tree_string.replace('\'', ''))
output_file.closed

def raxml_base_name(base_filename_without_ext,current_time):
return base_filename_without_ext+"."+str(current_time) +".iteration_"
Expand Down Expand Up @@ -247,6 +250,7 @@ def filter_out_alignments_with_too_much_missing_data(input_filename, output_file
output_handle = open(output_filename, "w+")
alignments = AlignIO.parse(input_handle, "fasta")
output_alignments = []
number_of_included_alignments = 0
for alignment in alignments:
for record in alignment:
number_of_gaps = 0
Expand All @@ -260,10 +264,14 @@ def filter_out_alignments_with_too_much_missing_data(input_filename, output_file
print "Excluded sequence " + record.id + " because there werent enough bases in it"
elif((number_of_gaps*100/sequence_length) <= filter_percentage):
output_alignments.append(record)
number_of_included_alignments += 1
else:
if verbose > 0:
print "Excluded sequence " + record.id + " because it had " + str(number_of_gaps*100/sequence_length) +" percentage gaps while a maximum of "+ str(filter_percentage) +" is allowed"

if number_of_included_alignments <= 1:
sys.exit("Too many sequences have been excluded so theres no data left to work with. Please increase the -f parameter")

AlignIO.write(MultipleSeqAlignment(output_alignments), output_handle, "fasta")
output_handle.close()
input_handle.close()
Expand Down

0 comments on commit 4f2d34d

Please sign in to comment.