-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcmc_gene_content_original.Rev
102 lines (73 loc) · 3.02 KB
/
mcmc_gene_content_original.Rev
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
###########################################################################
#
# RevBayes Script: Genome Gene Content Based Phylogenies
#
#
# author: Sebastian Hoehna
#
###########################################################################
setOption("useScaling","TRUE")
setOption("printNodeIndex","FALSE")
### Read in gene content data
#data <- readDiscreteCharacterData("sample (3).nexus")
#data <- readDiscreteCharacterData("meta23-homo.nex")
data <- readDiscreteCharacterData( "data/" + DATASET + ".nexus")
# Get some useful variables from the data. We need these later on.
n_species <- data.ntaxa()
n_branches <- 2*n_species - 3
taxa <- data.taxa()
moves = VectorMoves()
monitors = VectorMonitors()
######################
# Substitution matrix
######################
# prior for the stationary frequency is Beta(1,1)
pi_prior <- v(1,1)
pi ~ dnDirichlet(pi_prior)
moves.append( mvSimplexElementScale(pi, weight=5, alpha=10) )
# Create a deterministic variable for the rate matrix
Q := fnFreeBinary(transition_rates=pi,rescaled=true)
####################################
# Across sites rate variation
####################################
# Prior for shape parameter is Uniform(0,1E7)
alpha ~ dnUniform(0,1E7)
alpha.setValue(1.0)
moves.append( mvScale(alpha, weight=2.0, lambda=0.2) )
# Create a discretized Gamma(alpha,alpha) distribution with 4 rate categories
gamma_rates := fnDiscretizeGamma( alpha, alpha, 4, true )
####################################
# Hierarchical branch length prior
####################################
# Prior mean branch length mu is Exp(10)
mu ~ dnExponential(10)
mu.setValue( 0.1 )
#moves.append( mvScale(mu, weight=1.0, lambda=0.3) )
# branch lengths are iid Exp(1/mu)
rec_mu := 1/mu
psi ~ dnUniformTopologyBranchLength( dnExp(rec_mu), taxa )
moves.append( mvSPR(psi, weight=n_branches/2.0) )
moves.append( mvNNI(psi, weight=n_branches) )
moves.append( mvBranchLengthScale(psi, weight=n_branches) )
####################################
# Substitution model
####################################
# Initialize the substitution model
seq ~ dnPhyloCTMC(tree=psi, Q=Q, siteRates=gamma_rates, type="Restriction", coding="noabsencesites|nosingletonpresence")
#seq ~ dnPhyloDolloCTMC(tree=psi, siteRates=gamma_rates, coding="noabsencesites|nosingletonpresence")
# Attach the data
seq.clamp(data)
####################################
# MCMC
####################################
# Initialize the DAG model
mymodel = model(Q)
# Initialize the monitors
monitors.append( mnModel(filename="output/"+DATASET+".log",printgen=1, separator = TAB) )
monitors.append( mnStochasticVariable(filename="output/"+DATASET+".var",printgen=1, separator = TAB) )
monitors.append( mnFile(filename="output/"+DATASET+".trees",printgen=1, separator = TAB, posterior=false, likelihood=false, prior=false, psi) )
monitors.append( mnScreen(printgen=100, alpha, mu, pi) )
# Initialize and run the MCMC
mymcmc = mcmc(mymodel, monitors, moves, nruns=N_REPS, combine="mixed", moveschedule="random")
mymcmc.run(generations=NUM_MCMC_ITERATIONS, tuningInterval=200)
q()