-
Notifications
You must be signed in to change notification settings - Fork 44
dishonest_comp
Paul Lott edited this page Nov 23, 2013
·
13 revisions
#Comparison of Dishonest Casino Models in StochHMM, R-HMM, Mamot, and HMMoc
##StochHMM Model
#STOCHHMM MODEL FILE
MODEL INFORMATION
======================================================
MODEL_NAME: CASINO DICE MODEL
MODEL_DESCRIPTION: Taken from CH3 Durbin/Eddy
MODEL_CREATION_DATE: August 28,2009
TRACK SYMBOL DEFINITIONS
======================================================
DICE: 1,2,3,4,5,6
STATE DEFINITIONS
#############################################
STATE:
NAME: INIT
TRANSITION: STANDARD: P(X)
FAIR: 0.5
LOADED: 0.5
#############################################
STATE:
NAME: FAIR
PATH_LABEL: F
GFF_DESC: FAIR
TRANSITION: STANDARD: P(X)
FAIR: 0.95
LOADED: 0.05
END: 1
EMISSION: DICE: P(X)
ORDER: 0
@1 2 3 4 5 6
0.167 0.167 0.167 0.167 0.167 0.167
#############################################
STATE:
NAME: LOADED
PATH_LABEL: L
GFF_DESC: LOADED
TRANSITION: STANDARD: P(X)
FAIR: 0.1
LOADED: 0.9
END: 1
EMISSION: DICE: P(X)
ORDER: 0
@1 2 3 4 5 6
0.1 0.1 0.1 0.1 0.1 0.5
#############################################
//END
##R HMM Model Adapted from R HMM Library
library(HMM)
args = commandArgs(trailingOnly = TRUE);
#Name of dice roll file (which is comma separated)
seqFile = args[1];
#Flag for Viterbi (if 1 then perform viterbi, otherwise perform posterior)
viterbi = as.numeric(args[2])
alphabet = c(1, 2, 3, 4, 5, 6)
states = c("Fair", "Loaded")
#State Emissions
fair = c(1/6, 1/6, 1/6, 1/6, 1/6, 1/6)
loaded = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.5)
#Transition matrix
transProbs = matrix(c(0.99, 0.01, 0.02, 0.98), c(2,2), byrow = TRUE)
#Emission Matrix
emissionProbs = matrix(c(fair, loaded), c(2, 2), byrow = TRUE)
hmm = initHMM(states, alphabet, transProbs = transProbs, emissionProbs = emissionProbs)
observations = scan(file = seqFile,sep=",")
if (viterbi == 1){
vit = viterbi(hmm, observations)
}
if (viterbi == 0){
f = forward(hmm, observations)
b = backward(hmm, observations)
i <- f[1, length(observations)]
j <- f[2, length(observations)]
probObservations = (i + log(1 + exp(j - i)))
posterior = exp((f + b) - probObservations)
}
gc()
# HMM for the "dishonest casino" presented by Durbin, Eddy, Krogh, Mitchison
# in "Biological sequence analysis" (Cambridge University Press, 1998).
# Exercise 1.1, p. 6 and p. 54, 56, 59, 61, 65
Alphabet: abcdef
################################################
State BEGIN
E: 0 0 0 0 0 0
T: F 1
# Fair
State F
E: 0.1666 0.1666 0.1666 0.1666 0.1666 0.1666
T: F 0.95 L 0.049 END 0.001
# not fair
State L
E: 0.1 0.1 0.1 0.1 0.1 0.5
T: F 0.1 L 0.9
State END
E: 0 0 0 0 0 0
T: END 0
################################################
NULLMODEL: 0.1666 0.1666 0.1666 0.1666 0.1666 0.1666
<?xml version="1.0"?>
<!--
This file is part of HMMoC 1.3, a hidden Markov model compiler.
Copyright (C) 2007 by Gerton Lunter, Oxford University.
HMMoC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
HMMOC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with HMMoC; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-->
<hml debug="true">
<author>Gerton Lunter</author>
<alphabet id="dice">
123456
</alphabet>
<output id="sequence">
<alphabet idref="dice"/>
<identifier type="length" value="iLen"/>
<identifier type="sequence" value="aSeq"/>
<code type="parameter" value="char *aSeq"/>
<code type="parameter" value="int iLen"/>
</output>
<hmm id="Casino">
<description> The occasionally dishonest casino </description>
<outputs id="casinooutputs">
<output idref="sequence"/>
</outputs>
<clique id="block1">
<state id="start"/>
</clique>
<clique id="block2">
<state id="honest"/>
<state id="dishonest"/>
</clique>
<clique id="block3">
<state id="end"/>
</clique>
<graph>
<clique idref="block1"/>
<clique idref="block2"/>
<clique idref="block3"/>
</graph>
<transitions>
<transition from="start" to="honest" probability="one" emission="emitHonest"/>
<transition from="honest" to="honest" probability="stayHonest" emission="emitHonest"/>
<transition from="honest" to="dishonest" probability="goDishonest" emission="emitDishonest"/>
<transition from="dishonest" to="dishonest" probability="stayDishonest" emission="emitDishonest"/>
<transition from="dishonest" to="honest" probability="goHonest" emission="emitHonest"/>
<transition from="honest" to="end" probability="goStop" emission="empty"/>
<transition from="dishonest" to="end" probability="goStop" emission="empty"/>
</transitions>
<code id="paramsClassDef" where="classdefinitions">
<![CDATA[
struct Params {
double iGoHonest;
double iGoDishonest;
double iGoStop;
double aEmitDishonest[6];
};
]]>
</code>
<emission id="empty">
<probability>
<code type="expression"> 1.0 </code>
</probability>
</emission>
<emission id="emitHonest">
<output idref="sequence"/>
<probability>
<code type="statement">
<identifier output="sequence" value="iEmission"/>
<identifier type="result" value="iProb"/>
<![CDATA[
iProb = 1/6.0;
/* This probability does not depend on the symbol. HMMoC warns if it does not see the label 'iEmission'
somewhere in the code -- its appearance in this comment stops it from warning */
]]>
</code>
</probability>
</emission>
<emission id="emitDishonest">
<output idref="sequence"/>
<probability>
<code type="statement">
<identifier output="sequence" value="iEmission"/>
<identifier type="result" value="iProb"/>
<!-- Here goes the code computing the probability -->
<![CDATA[
iProb = iPar.aEmitDishonest[ iEmission - '1' ];
]]>
</code>
</probability>
</emission>
<probability id="one"><code> 1.0 </code></probability>
<probability id="goDishonest">
<code>
<!-- Tell HMMoC that this code requires an input parameter, which itself need a definition to make sense -->
<code type="parameter" init="paramsClassDef" value="Params iPar"/>
<!-- The actual code for this probability follows (no need to quote this) -->
iPar.iGoDishonest
</code>
</probability>
<probability id="goHonest"><code> iPar.iGoHonest </code></probability>
<probability id="goStop"><code> iPar.iGoStop </code></probability>
<probability id="stayHonest"><code> 1.0 - iPar.iGoDishonest - iPar.iGoStop </code></probability>
<probability id="stayDishonest"><code> 1.0 - iPar.iGoHonest - iPar.iGoStop </code></probability>
</hmm>
<!-- Code generation -->
<forward outputTable="yes" name="Forward" id="fw">
<!-- Specify HMM to make code for -->
<hmm idref="Casino"/>
</forward>
<backward outputTable="yes" baumWelch="yes" name="Backward" id="bw">
<!-- Specify HMM to make code for -->
<hmm idref="Casino"/>
</backward>
<viterbi name="Viterbi" id="vit">
<hmm idref="Casino"/>
</viterbi>
<codeGeneration realtype="bfloat" file="casino.cc" header="casino.h" language="C++">
<forward idref="fw"/>
<backward idref="bw"/>
<viterbi idref="vit"/>
</codeGeneration>
</hml>
##HMMoc Viterbi Code
/*
* This file is part of HMMoC 1.3, a hidden Markov model compiler.
* Copyright (C) 2007 by Gerton Lunter, Oxford University.
*
* HMMoC is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* HMMOC is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HMMoC; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
\*/
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <string>
#include "casino.h"
void import_fasta(char* file,std::string& seq){
std::ifstream fa;
fa.open(file);
if (!fa.good()){
return;
}
std::string header;
getline(fa,header,'\n');
std::string line;
while(getline(fa,line,'\n')){
if (line[0] == ' ' || line[0]=='>'){
continue;
}
seq += line;
}
return;
}
int main(int argc, char** argv) {
// The parameters of the model
Params iPar;
// Fill it with some values
iPar.iGoDishonest = 0.05; // probability of going from Honest to the Dishonest state
iPar.iGoHonest = 0.02; // probability of going from Dishonest to the Honest state
iPar.iGoStop = 0.00001; // probability of going from either to the End state
iPar.aEmitDishonest[0] = 0.1;
iPar.aEmitDishonest[1] = 0.1;
iPar.aEmitDishonest[2] = 0.1;
iPar.aEmitDishonest[3] = 0.1;
iPar.aEmitDishonest[4] = 0.1;
iPar.aEmitDishonest[5] = 0.5; // Probability of throwing a 6 is heavily favoured in the Dishonest state
//Import Sequence
std::string aSequence;
import_fasta(argv[1],aSequence);
int iPathLength=aSequence.length();
char* seq = new char[iPathLength+1];
strcpy(seq, aSequence.c_str());
aSequence="";
// Decode the emission sequence using Viterbi, and compute posteriors and Baum Welch counts using Forward and Backward
CasinoDPTable *pViterbiDP;
cout << "Calculating Viterbi probability..." << endl;
bfloat iVitProb = Viterbi_recurse(&pViterbiDP, iPar, seq, iPathLength );
cout << "Viterbi: "<<iVitProb<<endl;
cout << "Calculating Viterbi path..." << endl;
Path& iViterbiPath = Viterbi_trace(pViterbiDP, iPar, seq, iPathLength );
// Compare the true and Viterbi paths, and print the posterior probability of being in the honest state
int iVHonest = pViterbiDP->getId("honest");
for (int i=0; i<iPathLength; i++) {
if (iViterbiPath.toState(i) == iVHonest) {
cout << "H";
} else {
cout << "D";
}
}
std::cout << std::endl;
}
##HMMoc Posterior Code
/*
* This file is part of HMMoC 1.3, a hidden Markov model compiler.
* Copyright (C) 2007 by Gerton Lunter, Oxford University.
*
* HMMoC is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* HMMOC is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HMMoC; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
\*/
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include "casino.h"
void import_fasta(char* file,std::string& seq){
std::ifstream fa;
fa.open(file);
if (!fa.good()){
return;
}
std::string header;
getline(fa,header,'\n');
std::string line;
while(getline(fa,line,'\n')){
if (line[0] == ' ' || line[0]=='>'){
continue;
}
seq += line;
}
return;
}
int main(int argc, char** argv) {
// The parameters of the model
Params iPar;
// Fill it with some values
iPar.iGoDishonest = 0.05; // probability of going from Honest to the Dishonest state
iPar.iGoHonest = 0.02; // probability of going from Dishonest to the Honest state
iPar.iGoStop = 0.00001; // probability of going from either to the End state
iPar.aEmitDishonest[0] = 0.1;
iPar.aEmitDishonest[1] = 0.1;
iPar.aEmitDishonest[2] = 0.1;
iPar.aEmitDishonest[3] = 0.1;
iPar.aEmitDishonest[4] = 0.1;
iPar.aEmitDishonest[5] = 0.5; // Probability of throwing a 6 is heavily favoured in the Dishonest state
std::string aSequence;
import_fasta(argv[1],aSequence);
int iPathLength=aSequence.length();
char* seq = new char[iPathLength+1];
strcpy(seq, aSequence.c_str());
aSequence="";
// Decode the emission sequence using Viterbi, and compute posteriors and Baum Welch counts using Forward and Backward
CasinoDPTable *pViterbiDP, *pFWDP, *pBWDP;
CasinoBaumWelch bw;
cout << "Calculating Forward probability..." << endl;
bfloat iFWProb = Forward(&pFWDP, iPar, seq, iPathLength );
cout << "Forward: "<<iFWProb<<endl;
cout << "Calculating Backward probability..." << endl;
bfloat iBWProb = Backward(bw, pFWDP, &pBWDP, iPar, seq, iPathLength );
cout << "Backward:"<<iBWProb<<endl;
// Compare the true and Viterbi paths, and print the posterior probability of being in the honest state
int iVHonest = pViterbiDP->getId("honest");
for (int i=0; i<iPathLength; i++) {
double iPosterior = pFWDP->getProb("honest",i+1)*pBWDP->getProb("honest",i+1)/iFWProb;
cout << " " << iPosterior << endl;
}
}