-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrace.c
320 lines (289 loc) · 8.91 KB
/
trace.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/************************************************************
* HMMER - Biological sequence analysis with HMMs
* Copyright 1992-1995 Sean R. Eddy
*
* This source code is distributed under the terms of the
* GNU General Public License. See the files COPYING and
* GNULICENSE for details.
*
************************************************************/
/* trace.c
* Tue Mar 8 16:29:22 1994
*
* Stuff to do with tracebacks.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include "squid.h"
#include "states.h"
#include "externs.h"
#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif
/* Function: AllocTrace(), ReallocTrace(), FreeTrace()
*
* Purpose: allocation and freeing of traceback structures
*/
void
AllocTrace(int tlen, struct trace_s **ret_tr)
{
struct trace_s *tr;
if ((tr = (struct trace_s *) malloc (sizeof(struct trace_s))) == NULL)
Die("malloc failed");
if ((tr->nodeidx = (int *) malloc (sizeof(int) * tlen)) == NULL ||
(tr->statetype = (char *) malloc (sizeof(char) * tlen)) == NULL ||
(tr->rpos = (int *) malloc (sizeof(int) * tlen)) == NULL)
Die("malloc failed");
*ret_tr = tr;
}
void
ReallocTrace(struct trace_s *tr, int tlen)
{
if ((tr->nodeidx = (int *) realloc (tr->nodeidx, tlen * sizeof(int))) == NULL ||
(tr->statetype = (char *) realloc (tr->statetype, tlen * sizeof(char))) == NULL ||
(tr->rpos = (int *) realloc (tr->rpos, tlen * sizeof(int))) == NULL)
Die("malloc failed");
}
void
FreeTrace(struct trace_s *tr)
{
free(tr->nodeidx);
free(tr->statetype);
free(tr->rpos);
free(tr);
}
/* Function: ReverseTrace()
*
* Purpose: Tracebacks are more easily constructed backwards,
* using overallocated trace_s structures. Here we
* reverse the arrays of a traceback and give some
* extra memory back to malloc.
*
* Arguments: tr - the traceback to reverse
* tlen - actual length of the traceback.
* This means that END is at 0 and BEGIN is at
* tlen-1.
*/
void
ReverseTrace(struct trace_s *tr, int tlen)
{
int *new_nodeidx;
char *new_statetype;
int *new_rpos;
int opos;
int npos;
/* allocate for new reversed arrays */
if ((new_nodeidx = (int *) malloc (sizeof(int) * tlen)) == NULL ||
(new_statetype = (char *) malloc (sizeof(char) * tlen)) == NULL ||
(new_rpos = (int *) malloc (sizeof(int) * tlen)) == NULL)
Die("malloc failed");
/* reverse the arrays */
for (npos = 0, opos = tlen-1; npos < tlen; npos++, opos--)
{
new_nodeidx[npos] = tr->nodeidx[opos];
new_statetype[npos] = tr->statetype[opos];
new_rpos[npos] = tr->rpos[opos];
}
/* free old, switch in the new */
free(tr->nodeidx); tr->nodeidx = new_nodeidx;
free(tr->statetype); tr->statetype = new_statetype;
free(tr->rpos); tr->rpos = new_rpos;
tr->tlen = tlen;
}
/* Function: PrintTrace()
*
* Purpose: Debugging. Print out a traceback.
*/
void
PrintTrace(struct trace_s *tr)
{
int j;
printf("=== Traceback === (length %d)\n", tr->tlen);
printf("Node indexes: ");
for (j = 0; j < tr->tlen; j++) printf("%2d ", tr->nodeidx[j]);
printf("\nState types: ");
for (j = 0; j < tr->tlen; j++) printf("%2d ", tr->statetype[j]);
printf("\nSeq positions: ");
for (j = 0; j < tr->tlen; j++) printf("%2d ", tr->rpos[j]);
puts("");
}
/* Function: TraceCount()
*
* Purpose: Count a traceback into a count-based HMM structure.
* (Usually as part of a model parameter re-estimation.)
*
* Args: hmm - counts-based HMM
* seq - the sequence that the traceback aligns to the HMM (0..L-1)
* wt - weight on the sequence
* tr - alignment of seq to HMM
*
* Return: (void)
*/
void
TraceCount(struct hmm_struc *hmm, char *seq, float wt, struct trace_s *tr)
{
int spos; /* position in tr */
int rpos; /* symbol position in seq */
for (spos = 0; spos < tr->tlen; spos++)
{
rpos = tr->rpos[spos];
/* Emission counts
*/
if (tr->nodeidx[spos] > 0 &&
tr->nodeidx[spos] <= hmm->M &&
tr->statetype[spos] == MATCH)
CountSymbol(seq[rpos], wt, hmm->mat[tr->nodeidx[spos]].p);
else if (tr->statetype[spos] == INSERT)
CountSymbol(seq[rpos], wt, hmm->ins[tr->nodeidx[spos]].p);
/* State transition counts
*/
if (spos < tr->tlen-1) {
switch (tr->statetype[spos]) {
case MATCH:
switch (tr->statetype[spos+1])
{
case MATCH: hmm->mat[tr->nodeidx[spos]].t[MATCH] += wt; break;
case INSERT: hmm->mat[tr->nodeidx[spos]].t[INSERT] += wt; break;
case DELETE: hmm->mat[tr->nodeidx[spos]].t[DELETE] += wt; break;
default: Die("unrecognized statetype %d", tr->statetype[spos+1]);
}
break;
case INSERT:
switch (tr->statetype[spos+1])
{
case MATCH: hmm->ins[tr->nodeidx[spos]].t[MATCH] += wt; break;
case INSERT: hmm->ins[tr->nodeidx[spos]].t[INSERT] += wt; break;
case DELETE: hmm->ins[tr->nodeidx[spos]].t[DELETE] += wt; break;
default: Die("unrecognized statetype %d", tr->statetype[spos+1]);
}
break;
case DELETE:
switch (tr->statetype[spos+1])
{
case MATCH: hmm->del[tr->nodeidx[spos]].t[MATCH] += wt; break;
case INSERT: hmm->del[tr->nodeidx[spos]].t[INSERT] += wt; break;
case DELETE: hmm->del[tr->nodeidx[spos]].t[DELETE] += wt; break;
default: Die("unrecognized statetype %d", tr->statetype[spos+1]);
}
break;
default: Die("Unrecognized statetype %d", tr->statetype[spos]);
}
}
}
}
/* Function: TraceScore()
*
* Purpose: Calculate a score from a traceback. Used for the emit.c
* functions, and eventually maxmodelmaker.c.
*
* Args: shmm - search form HMM structure
* seq - sequence 0..len-1
* tr - traceback that aligns seq to hmm
* ret_score - RETURN: the score of that alignment
*
* Return: 1 on success, 0 on failure.
*/
int
TraceScore(struct shmm_s *shmm,
char *seq,
struct trace_s *tr,
float *ret_score)
{
int pos; /* position in seq */
int sym;
int spos; /* position in state sequence */
int score;
int k;
score = 0;
for (spos = 0; spos < tr->tlen; spos++)
{
pos = tr->rpos[spos];
/* Transition cost
*/
if (spos > 0) {
k = tr->nodeidx[spos-1];
switch (tr->statetype[spos-1]) {
case MATCH:
switch (tr->statetype[spos]) {
case MATCH: score += shmm->t[k*9 + Tmm]; break;
case DELETE: score += shmm->t[k*9 + Tmd]; break;
case INSERT: score += shmm->t[k*9 + Tmi]; break;
default: Die("unrecognized statetype %d\n", spos);
}
break;
case DELETE:
switch (tr->statetype[spos]) {
case MATCH: score += shmm->t[k*9 + Tdm]; break;
case DELETE: score += shmm->t[k*9 + Tdd]; break;
case INSERT: score += shmm->t[k*9 + Tdi]; break;
default: Die("unrecognized statetype %d\n", spos);
}
break;
case INSERT:
switch (tr->statetype[spos]) {
case MATCH: score += shmm->t[k*9 + Tim]; break;
case DELETE: score += shmm->t[k*9 + Tid]; break;
case INSERT: score += shmm->t[k*9 + Tii]; break;
default: Die("unrecognized statetype %d\n", spos);
}
break;
default: Die("unrecognized statetype %d\n", spos);
}
}
/* Emission cost
*/
k = tr->nodeidx[spos];
if (tr->statetype[spos] == MATCH &&
tr->nodeidx[spos] <= shmm->M &&
tr->nodeidx[spos] > 0)
{
sym = isupper((int) seq[pos]) ? seq[pos] - 'A' : seq[pos] - 'a';
score += shmm->m_emit[sym][k];
pos++;
}
if (tr->statetype[spos] == INSERT)
{
sym = isupper((int) seq[pos]) ? seq[pos] - 'A' : seq[pos] - 'a';
score += shmm->i_emit[sym][k];
pos++;
}
}
*ret_score = (float) (score / INTSCALE);
return 1;
}
/* Function: DealignTrace()
*
* Purpose: Take a traceback relative to an aligned sequence
* (such as the fake tracebacks produced by Maxmodelmaker())
* and make it relative to the raw sequence.
*
* Robust against S/W (local alignment) traces.
*
* Args: tr - traceback to dealign
* aseq - aligned sequence corresponding to traceback
* alen - length of aseq
*/
void
DealignTrace(struct trace_s *tr, char *aseq, int alen)
{
int *rmap; /* position in raw sequence, 0..alen-1 */
int rpos, apos;
int tpos; /* position in traceback */
/* Construct a mapping of aligned seq to raw sequence coords
*/
rmap = (int *) MallocOrDie (sizeof(int) * alen);
for (apos = rpos = 0; apos < alen; apos++)
if (!isgap(aseq[apos]))
rmap[apos] = rpos++;
else
rmap[apos] = -1;
/* Dealign the trace
*/
for (tpos = 0; tpos < tr->tlen; tpos++)
if (tr->rpos[tpos] != -1)
tr->rpos[tpos] = rmap[tr->rpos[tpos]];
free(rmap);
}