diff --git a/src/dendropy/dataio/newickwriter.py b/src/dendropy/dataio/newickwriter.py index 7d3706845..63dabe96b 100644 --- a/src/dendropy/dataio/newickwriter.py +++ b/src/dendropy/dataio/newickwriter.py @@ -378,7 +378,8 @@ def _render_node_tag(self, node): if tag: tag = nexusprocessing.escape_nexus_token(tag, preserve_spaces=self.preserve_spaces, - quote_underscores=not self.unquoted_underscores) + quote_underscores=not self.unquoted_underscores, + protect_regex=r'''[()[\],;:'"\0\t\n]''') return tag else: return "" diff --git a/src/dendropy/dataio/nexusprocessing.py b/src/dendropy/dataio/nexusprocessing.py index 78c9d2876..77740e866 100644 --- a/src/dendropy/dataio/nexusprocessing.py +++ b/src/dendropy/dataio/nexusprocessing.py @@ -470,17 +470,25 @@ def format_item_annotations_as_comments( body = separator.join(parts) return prefix + body + suffix -def escape_nexus_token(label, preserve_spaces=False, quote_underscores=True): +def escape_nexus_token( + label, + preserve_spaces=False, + quote_underscores=True, + protect_regex=r'''[()[\]{}\\\/,;:=*'"`+\-<>\0\t\n]''', +): """ Properly protects a NEXUS token. + + Kwarg protect_regex allows less eager quoting when working with non-Nexus + Newick strings. """ if label is None: return "" if not preserve_spaces \ and "_" not in label \ - and not re.search(r'''[()[\]{}\\\/,;:=*'"`+\-<>\0\t\n]''', label): + and not re.search(protect_regex, label): label = label.replace(' ', '_').replace('\t', '_') - elif re.search(r'''[()[\]{}\\\/,;:=*'"`+-<>\0\t\n\r ]''', label) \ + elif re.search(protect_regex, label) \ or quote_underscores and "_" in label: s = label.split("'") if len(s) == 1: diff --git a/tests/unittests/test_dataio_newick_writer.py b/tests/unittests/test_dataio_newick_writer.py index 82d431d9f..4ecd92f6d 100644 --- a/tests/unittests/test_dataio_newick_writer.py +++ b/tests/unittests/test_dataio_newick_writer.py @@ -143,6 +143,12 @@ def test_roundtrip_full(self): # is_coerce_metadata_values_to_string=True, # is_distinct_nodes_and_edges_representation=False) + def test_quoting_eagerness(self): + # regression test for #200 + nw_str = "(A:0.36827875,B:0.186986942,(C:0.156890492,D:0.175861009)1.000:0.163375071);" + tree = dendropy.Tree.get(data=nw_str,schema="newick") + self.assertEqual(nw_str, tree.as_string("newick").strip()) + class NewickTreeWriterGeneralOptionsTests( compare_and_validate.ValidateWriteable, dendropytest.ExtendedTestCase):