From 82c1bb061eb43af38ab36fdfac805218afec3d84 Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Tue, 31 Mar 2020 09:09:05 -0700 Subject: [PATCH] changing singletons flag behavior --- hivclustering/mtnetwork.py | 11 ++++++----- hivclustering/networkbuild.py | 5 +++-- scripts/hivnetworkcsv | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/hivclustering/mtnetwork.py b/hivclustering/mtnetwork.py index ff1fdec..f6d00be 100644 --- a/hivclustering/mtnetwork.py +++ b/hivclustering/mtnetwork.py @@ -1665,8 +1665,9 @@ def retrieve_clusters(self, singletons=True, key = lambda node: node.cluster_id) clusters[cluster_id] = [] clusters[cluster_id].append(node) + # Remove all clusters with length 1 if not singletons: - clusters.pop(None, None) + clusters = {k:v for k,v in clusters.items() if len(v) > 1} return clusters def sort_clusters (self, singletons=True, filter = None, precomputed_clusters = None, start_id = 1, cluster_key = lambda node: node.cluster_id, set_cluster_id = lambda node, value: setattr(node, "cluster_id", value)): @@ -1800,11 +1801,11 @@ def compute_clusters(self, singletons=False, adjacency_matrix=None): cluster_id = [0] # this will pass the object by reference for node in self.nodes: - if (singletons or node in use_this_am) and node.cluster_id == None: - self.breadth_first_traverse(node, cluster_id, use_this_am) + if (bool(singletons) or node in use_this_am) and node.cluster_id == None: + self.breadth_first_traverse(node, cluster_id, use_this_am, singletons) - def breadth_first_traverse(self, node, cluster_id, use_this_am): - if node.cluster_id == None: + def breadth_first_traverse(self, node, cluster_id, use_this_am, singletons="report"): + if node.cluster_id == None and (node in use_this_am or singletons=="include"): cluster_id[0] += 1 node.cluster_id = cluster_id[0] if node in use_this_am: diff --git a/hivclustering/networkbuild.py b/hivclustering/networkbuild.py index 39b398a..9568006 100755 --- a/hivclustering/networkbuild.py +++ b/hivclustering/networkbuild.py @@ -214,8 +214,9 @@ def describe_network(network, json_output=False, keep_singletons=False): else: print("%d edges on %d nodes" % (network_stats['edges'], network_stats['nodes']), file=sys.stderr) + # If keep_singletons is set, then compute with them on, otherwise don't network.compute_clusters(keep_singletons) - clusters = network.retrieve_clusters() + clusters = network.retrieve_clusters(singletons=(keep_singletons=="include")) singletons = network.extract_singleton_nodes() if json_output: @@ -492,7 +493,7 @@ def build_a_network(extra_arguments = None): json_group.add_argument('-J', '--compact-json', dest = 'compact_json', help='Output the network report as a compact JSON object',required=False, action='store_true', default=False) json_group.add_argument('-j', '--json', help='Output the network report as a JSON object',required=False, action='store_true', default=False) - arguments.add_argument('-o', '--singletons', help='Include singletons in JSON output', action='store_true', default=False) + arguments.add_argument('-o', '--singletons', help='Include singletons in JSON output', choices=['include', 'report'], default='report') arguments.add_argument('-k', '--filter', help='Only return clusters with ids listed by a newline separated supplied file. ', required=False) arguments.add_argument('-s', '--sequences', help='Provide the MSA with sequences which were used to make the distance file. Can be specified multiple times to include mutliple MSA files', required=False, action = 'append') arguments.add_argument('-n', '--edge-filtering', dest='edge_filtering', choices=['remove', 'report'], help='Compute edge support and mark edges for removal using sequence-based triangle tests (requires the -s argument) and either only report them or remove the edges before doing other analyses ', required=False) diff --git a/scripts/hivnetworkcsv b/scripts/hivnetworkcsv index 15823fa..0b2c3bd 100755 --- a/scripts/hivnetworkcsv +++ b/scripts/hivnetworkcsv @@ -66,7 +66,7 @@ def make_hiv_network(): 'contaminants': settings().contaminants } - if settings().singletons: + if bool(settings().singletons): network_info['Settings']['singletons'] = True