Skip to content

Commit

Permalink
add no-dag gctree + other gctree2 paper stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
psathyrella committed Sep 19, 2024
1 parent 94b2a40 commit 1cd929f
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 8 deletions.
10 changes: 8 additions & 2 deletions bin/gctree-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def install():
cmds += ['micromamba create -n %s python=3.9' % args.env_label] # 3.10 currently has problems with ete
cmds += ['micromamba activate %s' % args.env_label]
cmds += ['micromamba install -c bioconda phylip']
cmds += ['micromamba install -c conda-forge gctree click']
cmds += ['micromamba install -c conda-forge%s click' % ('' if args.no_dag else ' gctree')]
if args.no_dag:
cmds += ['pip install gctree==3.3.0'] # I think having --user makes it install in ~/.local (outside mamba env)
# micromamba remove -n gctree --all # to nuke it and start over
utils.simplerun('\n'.join(cmds) + '\n', cmdfname='/tmp/tmprun.sh', debug=True)

Expand All @@ -76,7 +78,7 @@ def run_gctree():
# ----------------------------------------------------------------------------------------
def get_gctree_cmd():
tcmd = '%s/bin/xvfb-run -a gctree infer outfile abundances.csv --root %s --verbose --idlabel' % (utils.get_partis_dir(), args.root_label) # --idlabel writes the output fasta file
if not args.base_model:
if not args.base_model and not args.no_dag:
tcmd += ' --mutability %s/HS5F_Mutability.csv --substitution %s/HS5F_Substitution.csv' % (args.data_dir, args.data_dir)
if args.ranking_coeffs is not None:
tcmd += ' --ranking_coeffs %s' % (' '.join(c for c in args.ranking_coeffs))
Expand Down Expand Up @@ -205,6 +207,7 @@ def parse_output():
parser.add_argument('--input-forest-dir', help='If set, skips preparatory steps (see --only-write-forest), and looks for \'abundance.csv\' and parsimony forest file (\'outfile\') in the specified dir')
parser.add_argument('--overwrite', action='store_true')
parser.add_argument('--base-model', action='store_true', help='By default, we pass gctree info for the s5f mutation model; if this is set, we don\'t, and it instead use the base model.')
parser.add_argument('--no-dag', action='store_true', help='If set, use old v1 non-DAG gctree version (v3.3.0). Note that this uses a different env (see --env-label)')
parser.add_argument('--ranking-coeffs', nargs='+', help='see gctree help')
parser.add_argument('--branching-process-ranking-coeff', type=int, help='see gctree help')
parser.add_argument('--env-label', default='gctree')
Expand All @@ -226,6 +229,9 @@ def parse_output():
args.actions = utils.get_arg_list(args.actions, choices=['install', 'update', 'run', 'parse'])
args.infname = utils.fpath(args.infname)
args.outdir = utils.fpath(args.outdir)
if args.no_dag:
assert not args.base_model and args.branching_process_ranking_coeff is None and args.ranking_coeffs is None
args.env_label = 'gctree-no-dag'

if 'install' in args.actions:
install()
Expand Down
2 changes: 1 addition & 1 deletion bin/partis
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,7 @@ parent_args.append({'name' : '--get-selection-metrics', 'kwargs' : {'action' : '
parent_args.append({'name' : '--min-selection-metric-cluster-size', 'kwargs' : {'type' : int, 'default' : treeutils.default_min_selection_metric_cluster_size, 'help' : 'When calculating selection metrics, ignore clusters smaller than this. See also --min-paired-cluster-size-to-read, which is similar but applies earlier, when reading clusters from files.'}})
parent_args.append({'name' : '--min-paired-cluster-size-to-read', 'kwargs' : {'type' : int, 'default' : treeutils.default_min_selection_metric_cluster_size, 'help' : 'When reading paired annotations when getting selection metrics or plotting partitions, ignore clusters with either N h or l ids smaller than this. See also --min-selection-metric-cluster-size, which is similar but only skips selection metric calculation.'}})
parent_args.append({'name' : '--treefname', 'kwargs' : {'help' : 'newick-formatted file with a tree corresponding to the sequences either in --infname (if making new output, i.e. action is annotate or partition) or --outfname (if reading existing output, i.e. action is get-selection-metrics) (unrelated to --input-simulation-treefname).'}})
parent_args.append({'name' : '--tree-inference-method', 'kwargs' : {'choices' : ['fasttree', 'iqtree', 'iqtree-1.6.beta3', 'iqtree-2.3.1', 'raxml', 'gctree', 'gctree-base', 'gctree-mut-mult', 'linearham', 'igphyml', 'cpath'], 'help' : 'Method to use when inferring trees (default: fasttree)'}})
parent_args.append({'name' : '--tree-inference-method', 'kwargs' : {'choices' : ['fasttree', 'iqtree', 'iqtree-1.6.beta3', 'iqtree-2.3.1', 'raxml', 'gctree', 'gctree-base', 'gctree-mut-mult', 'gctree-no-dag', 'linearham', 'igphyml', 'cpath'], 'help' : 'Method to use when inferring trees (default: fasttree)'}})
parent_args.append({'name' : '--tree-inference-subdir', 'kwargs' : {'help' : 'Subdirectory of the (automatically-set) tree inference workdir to which to write tree inference output files. By default, these are written to a subdir of --outfname/--paired-outdir with the name of the inference method, but this argument allows multiple versions, as with --sub-plotdir.'}})
parent_args.append({'name' : '--infer-trees-with-only-leaves', 'kwargs' : {'action' : 'store_true', 'help' : 'Discard internal nodes in true trees when inferring phylogenetic trees.'}})
parent_args.append({'name' : '--selection-metrics-to-calculate', 'kwargs' : {'default' : 'lbi:lbr:aa-lbi:aa-lbr:cons-dist-aa', 'help' : 'colon-separated list of selection metrics to calculate.'}})
Expand Down
2 changes: 1 addition & 1 deletion bin/run-paired-loci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ bin=./test/cf-paired-loci.py
# methods=igblast:annotate:star-partition:partition:linearham # for test-antn imbal-v3
# methods=partition:single-chain-partis; xstr="--combo-extra-str single-vs-joint-partis"
# methods=scoper:single-chain-scoper; xstr="--combo-extra-str single-vs-joint-scoper" # NOTE this is only for vs-shm (comparing single vs joint); for time-reqd you only need scoper
methods=simu:cache-parameters:partition:write-fake-paired-annotations:replay-plot:iqtree:raxml:igphyml:gctree:gctree-base:gctree-mut-mult:tree-perf # coar stuff cache-parameters:partition
methods=simu:cache-parameters:partition:write-fake-paired-annotations:replay-plot:iqtree:raxml:igphyml:gctree:gctree-mut-mult:tree-perf # coar stuff cache-parameters:partition
astr="--actions $methods" #partition --merge-paired-partitions" #$methods"
# astr="--actions combine-plots --plot-metrics $methods --perf-metrics precision:n-clusters"
# astr="--actions combine-plots --plot-metrics $methods $xstr"
Expand Down
2 changes: 1 addition & 1 deletion projects/igphyml-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def install():
cmds += utils.mamba_cmds(args.env_label, only_prep=True)
cmds += ['micromamba create -n %s' % args.env_label]
cmds += ['micromamba activate %s' % args.env_label] # python= 3.6 and 3.9 failed, so i let it choose, it chose 3.5 which seems to work
cmds += ['micromamba install -c bioconda changeo']
cmds += ['micromamba install -c bioconda -c conda-forge changeo']
# micromamba remove --all -n args.env_label # to nuke it and start over
cmds += ['cd packages']
cmds += ['git clone https://bitbucket.org/kleinstein/igphyml']
Expand Down
2 changes: 1 addition & 1 deletion python/partitiondriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ def read_existing_output(self, outfname=None, ignore_args_dot_queries=False, rea
seqfileopener.add_input_metafo(self.input_info, annotation_list, keys_not_to_overwrite=['multiplicities', 'paired-uids']) # these keys are modified by sw (multiplicities) or paired clustering (paired-uids), so if you want to update them with this action here you're out of luck
if tmpact == 'update-meta-info' or (tmpact == 'get-selection-metrics' and self.args.add_selection_metrics_to_outfname):
print(' rewriting output file with %s: %s' % ('newly-calculated selection metrics' if tmpact=='get-selection-metrics' else 'updated input meta info', outfname))
if self.args.add_selection_metrics_to_outfname and self.args.tree_inference_method == 'gctree':
if self.args.add_selection_metrics_to_outfname and 'gctree' in self.args.tree_inference_method:
print(' %s writing gctree annotations (with inferred ancestral sequences added) to original output file, which means that if you rerun gctree things may crash/be messed up since the inferred ancestral sequences are already in the annotation' % utils.wrnstr())
self.write_output(annotation_list, set(), cpath=cpath, dont_write_failed_queries=True, extra_headers=extra_headers) # I *think* we want <dont_write_failed_queries> set, because the failed queries should already have been written, so now they'll just be mixed in with the others in <annotation_list>

Expand Down
2 changes: 2 additions & 0 deletions python/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ def interpolate_values(xvals, yvals):
'gctree-tree-perf' : 'GCtree',
'gctree-base-tree-perf' : 'GCtree (base)',
'gctree-mut-mult-tree-perf' : 'GCtree (context)',
'gctree-no-dag-tree-perf' : 'GCtree (no DAG)',
# '0.1-true-singletons' : '10% random singletons',
# '0.1-true-reassign' : '10% random reassign',
'misassign-0.60-singletons' : 'synth. 60%\nsingleton',
Expand Down Expand Up @@ -777,6 +778,7 @@ def interpolate_values(xvals, yvals):
'gctree-tree-perf' : dmp[3],
'gctree-base-tree-perf' : dmp[3],
'gctree-mut-mult-tree-perf' : dmp[3],
'gctree-no-dag-tree-perf' : 'grey',
'enclone' : 'green',
'mixcr' : '#2b65ec',
'misassign-0.60-singletons' : '#808080',
Expand Down
4 changes: 3 additions & 1 deletion python/treeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
# default_lb_tau = 0.0025
# default_lbr_tau_factor = 1
default_min_selection_metric_cluster_size = 10
gct_methods = ['gctree', 'gctree-base', 'gctree-mut-mult']
gct_methods = ['gctree', 'gctree-base', 'gctree-mut-mult', 'gctree-no-dag']
iqt_methods = ['iqtree', 'iqtree-1.6.beta3', 'iqtree-2.3.1']
inf_anc_methods = ['raxml', 'linearham', 'igphyml'] + gct_methods + iqt_methods # methods that infer ancestors
all_phylo_methods = ['fasttree', 'raxml', 'linearham', 'igphyml'] + gct_methods + iqt_methods
Expand Down Expand Up @@ -1074,6 +1074,8 @@ def getcmd(workdir):
cmd += ' --base-model'
if method == 'gctree-mut-mult':
cmd += ' --ranking-coeffs 0 -1 0 --branching-process-ranking-coeff 0'
if method == 'gctree-no-dag':
cmd += ' --no-dag'
if only_pass_leaves:
cmd += ' --expand-all-nodes' # these aren't actually the same, but if we're only passing leaves, it's probably because we want all input seqs to end up as leaves, so we want to expand all of em
elif method == 'linearham':
Expand Down
2 changes: 1 addition & 1 deletion test/cf-paired-loci.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
all_perf_metrics += pcfrac_metrics
synth_actions = ['synth-%s'%a for a in ['distance-0.00', 'distance-0.005', 'distance-0.01', 'distance-0.02', 'distance-0.03', 'reassign-0.10', 'singletons-0.40', 'singletons-0.20']]
ptn_actions = ['partition', 'partition-lthresh', 'star-partition', 'vsearch-partition', 'annotate', 'vjcdr3-0.9', 'vjcdr3-0.8', 'scoper', 'mobille', 'igblast', 'linearham', 'enclone'] + synth_actions # using the likelihood (rather than hamming-fraction) threshold makes basically zero difference
phylo_actions = ['iqtree', 'raxml', 'gctree', 'gctree-base', 'gctree-mut-mult', 'igphyml'] # , 'iqtree-1.6.beta3', 'iqtree-2.3.1'
phylo_actions = ['iqtree', 'raxml', 'gctree', 'gctree-mut-mult', 'gctree-no-dag', 'igphyml'] # , 'iqtree-1.6.beta3', 'iqtree-2.3.1' # , 'gctree-base'
tree_perf_actions = ['%s-tree-perf'%a for a in phylo_actions] # it would be really nice to run tree perf during the phylo action, but i can't figure out a good way to do that (main problem is getting access to both true and inferred annotations in a sensible way)
after_actions = ['replay-plot', 'cache-parameters', 'merge-paired-partitions', 'get-selection-metrics', 'parse-linearham-trees', 'write-fake-paired-annotations', 'tree-perf'] + ptn_actions + phylo_actions + tree_perf_actions # actions that come after simulation (e.g. partition)
plot_actions = ['single-chain-partis', 'single-chain-scoper']
Expand Down

0 comments on commit 1cd929f

Please sign in to comment.