Skip to content

Commit

Permalink
Fix overeager Newick quoting
Browse files Browse the repository at this point in the history
Includes regression test for #200
  • Loading branch information
mmore500 committed Jun 2, 2024
1 parent 017918a commit e01833a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
3 changes: 2 additions & 1 deletion src/dendropy/dataio/newickwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ def _render_node_tag(self, node):
if tag:
tag = nexusprocessing.escape_nexus_token(tag,
preserve_spaces=self.preserve_spaces,
quote_underscores=not self.unquoted_underscores)
quote_underscores=not self.unquoted_underscores,
protect_regex=r'''[()[\],;:'"\0\t\n]''')
return tag
else:
return ""
Expand Down
14 changes: 11 additions & 3 deletions src/dendropy/dataio/nexusprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,17 +470,25 @@ def format_item_annotations_as_comments(
body = separator.join(parts)
return prefix + body + suffix

def escape_nexus_token(label, preserve_spaces=False, quote_underscores=True):
def escape_nexus_token(
label,
preserve_spaces=False,
quote_underscores=True,
protect_regex=r'''[()[\]{}\\\/,;:=*'"`+\-<>\0\t\n]''',
):
"""
Properly protects a NEXUS token.
Kwarg protect_regex allows less eager quoting when working with non-Nexus
Newick strings.
"""
if label is None:
return ""
if not preserve_spaces \
and "_" not in label \
and not re.search(r'''[()[\]{}\\\/,;:=*'"`+\-<>\0\t\n]''', label):
and not re.search(protect_regex, label):
label = label.replace(' ', '_').replace('\t', '_')
elif re.search(r'''[()[\]{}\\\/,;:=*'"`+-<>\0\t\n\r ]''', label) \
elif re.search(protect_regex, label) \
or quote_underscores and "_" in label:
s = label.split("'")
if len(s) == 1:
Expand Down
6 changes: 6 additions & 0 deletions tests/unittests/test_dataio_newick_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ def test_roundtrip_full(self):
# is_coerce_metadata_values_to_string=True,
# is_distinct_nodes_and_edges_representation=False)

def test_quoting_eagerness(self):
# regression test for #200
nw_str = "(A:0.36827875,B:0.186986942,(C:0.156890492,D:0.175861009)1.000:0.163375071);"
tree = dendropy.Tree.get(data=nw_str,schema="newick")
self.assertEqual(nw_str, tree.as_string("newick").strip())

class NewickTreeWriterGeneralOptionsTests(
compare_and_validate.ValidateWriteable,
dendropytest.ExtendedTestCase):
Expand Down

0 comments on commit e01833a

Please sign in to comment.