forked from EddyRivasLab/easel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheasel.h
524 lines (459 loc) · 20.9 KB
/
easel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
/* Easel's foundation.
*
* Core functionality of easel: errors, memory allocations, constants,
* and configuration for portability.
*
* Contents:
* 1. Macros implementing Easel error handling conventions
* 2. Macros implementing Easel memory allocation conventions
* 3. Macros implementing Easel function argument conventions
* 4. Macros implementing Easel debugging output conventions
* 5. Defined constants
* 6. Basic support for Easel digitized biosequences
* 7. Using optional compiler attributes
* 8. Miscellaneous
* 9. API declarations of easel.c
*/
#ifndef eslEASEL_INCLUDED
#define eslEASEL_INCLUDED
#include "esl_config.h"
#include <stdlib.h>
#include <stdio.h> /* for FILE */
#include <stdarg.h> /* for va_list */
#include <math.h> /* for HUGE_VAL */
#ifdef HAVE_STDINT_H
#include <stdint.h> /* for uint32_t and the like (C99) */
#endif
#ifdef HAVE_INTTYPES_H
#include <inttypes.h> /* some systems allegedly put uints here */
#endif
/*****************************************************************
* 1. Macros implementing Easel's error handling conventions
*****************************************************************/
/* Many objects contain a fixed length "errbuf" for failure
* diagnostics: ESL_FAIL() and ESL_XFAIL() fill this buffer.
*/
#define eslERRBUFSIZE 128
/* ESL_FAIL() - return an error message, without cleanup.
* ESL_XFAIL() - return an error message, with cleanup.
* ESL_EXCEPTION() - throwing an exception, without cleanup.
* ESL_XEXCEPTION() - throwing an exception, with cleanup.
*
* The X versions (with cleanup) require the caller to have an
* <int status> variable and a <ERROR:> goto target in scope,
* which, yes, is a little hacky.
*
* Wrapping these macros in <while(0)> loops allows a statement:
* if (something) ESL_XEXCEPTION(code,mesg);
* without the trailing semicolon becoming a null statement after
* macro expansion.
*
* All esl_fail() does is vsnprintf() to the <errbuf>; the reason to
* have ESL_FAIL and ESL_XFAIL call esl_fail() is to enable us to set
* a debugging breakpoint in esl_fail(), so we can break execution at
* a normal failure.
*/
/*::cexcerpt::error_macros::begin::*/
#define ESL_FAIL(code, errbuf, ...) do { \
esl_fail(errbuf, __VA_ARGS__); \
return code; } \
while (0)
#define ESL_XFAIL(code, errbuf, ...) do { \
status = code; \
esl_fail(errbuf, __VA_ARGS__); \
goto ERROR; } \
while (0)
#define ESL_EXCEPTION(code, ...) do { \
esl_exception(code, FALSE, __FILE__, __LINE__, __VA_ARGS__); \
return code; } \
while (0)
#define ESL_XEXCEPTION(code, ...) do { \
status = code; \
esl_exception(code, FALSE, __FILE__, __LINE__, __VA_ARGS__); \
goto ERROR; } \
while (0)
#define ESL_EXCEPTION_SYS(code, ...) do { \
esl_exception(code, TRUE, __FILE__, __LINE__, __VA_ARGS__); \
return code; } \
while (0)
#define ESL_XEXCEPTION_SYS(code, ...) do { \
status = code; \
esl_exception(code, TRUE, __FILE__, __LINE__, __VA_ARGS__); \
goto ERROR; } \
while (0)
/*::cexcerpt::error_macros::end::*/
/* Return codes for error handler
*/
/*::cexcerpt::statuscodes::begin::*/
#define eslOK 0 /* no error/success */
#define eslFAIL 1 /* failure */
#define eslEOL 2 /* end-of-line (often normal) */
#define eslEOF 3 /* end-of-file (often normal) */
#define eslEOD 4 /* end-of-data (often normal) */
#define eslEMEM 5 /* malloc or realloc failed */
#define eslENOTFOUND 6 /* file or key not found */
#define eslEFORMAT 7 /* file format not correct */
#define eslEAMBIGUOUS 8 /* an ambiguity of some sort */
#define eslEDIVZERO 9 /* attempted div by zero */
#define eslEINCOMPAT 10 /* incompatible parameters */
#define eslEINVAL 11 /* invalid argument/parameter */
#define eslESYS 12 /* generic system call failure */
#define eslECORRUPT 13 /* unexpected data corruption */
#define eslEINCONCEIVABLE 14 /* "can't happen" error */
#define eslESYNTAX 15 /* invalid user input syntax */
#define eslERANGE 16 /* value out of allowed range */
#define eslEDUP 17 /* saw a duplicate of something */
#define eslENOHALT 18 /* a failure to converge */
#define eslENORESULT 19 /* no result was obtained */
#define eslENODATA 20 /* no data provided, file empty */
#define eslETYPE 21 /* invalid type of argument */
#define eslEOVERWRITE 22 /* attempted to overwrite data */
#define eslENOSPACE 23 /* ran out of some resource */
#define eslEUNIMPLEMENTED 24 /* feature is unimplemented */
#define eslENOFORMAT 25 /* couldn't guess file format */
#define eslENOALPHABET 26 /* couldn't guess seq alphabet */
#define eslEWRITE 27 /* write failed (fprintf, etc) */
#define eslEINACCURATE 28 /* return val may be inaccurate */
/*::cexcerpt::statuscodes::end::*/
/*****************************************************************
* 2. Macros implementing Easel's memory allocation conventions
*****************************************************************/
/* ESL_ALLOC(), ESL_RALLOC():
*
* Allocation and reallocation wrappers.
* Both require <int status> in scope, and <ERROR:> goto target.
* ESL_RALLOC() also requires <void *> ptr to be provided as <tmp>.
*
* ESL_REALLOC() is a newer version of ESL_RALLOC() which doesn't
* need a tmp ptr. All ESL_RALLOC() calls can be safely converted
* to ESL_REALLOC() calls.
*
* The result of malloc(0) is implementation-defined (either NULL or
* a ptr that may not be dereferenced), a bit of a hole in the C
* standard. In Easel, we want to avoid having NULL as a valid
* non-error result of malloc(), because it confuses static analysis
* tools when they see dereferences of possibly NULL pointers. We
* therefore treat malloc(0) as an eslEMEM error.
*/
/*::cexcerpt::alloc_macros::begin::*/
#define ESL_ALLOC(p, size) do {\
if ( size <= 0 ) { \
p = NULL; \
status = eslEMEM; \
esl_exception(status, FALSE, __FILE__, __LINE__, "zero malloc disallowed"); \
goto ERROR;\
}\
if ( ((p) = malloc(size)) == NULL) { \
status = eslEMEM;\
esl_exception(status, FALSE, __FILE__, __LINE__, "malloc of size %d failed", size); \
goto ERROR;\
}} while (0)
#define ESL_RALLOC(p, tmp, newsize) do {\
if ((p) == NULL) { (tmp) = malloc(newsize); }\
else { (tmp) = realloc((p), (newsize)); }\
if ((tmp) != NULL) (p) = (tmp);\
else {\
status = eslEMEM;\
esl_exception(status, FALSE, __FILE__, __LINE__, "realloc for size %d failed", newsize); \
goto ERROR;\
}} while (0)
#define ESL_REALLOC(p, newsize) do {\
void *esltmpp;\
if ((p) == NULL) { (esltmpp) = malloc(newsize); }\
else { (esltmpp) = realloc((p), (newsize)); }\
if ((esltmpp) != NULL) (p) = (esltmpp);\
else {\
status = eslEMEM;\
esl_exception(status, FALSE, __FILE__, __LINE__, "realloc for size %d failed", newsize); \
goto ERROR;\
}} while (0)
/*::cexcerpt::alloc_macros::end::*/
/* Convert MiB,GiB,TiB to bytes, using binary definitions (2^20, 2^30, 2^40):
* Pedantically speaking, that's: mebibytes (MiB), gibibytes (GiB), tebibytes (TiB).
* 1 TB = 10^12 bytes; 1 TiB = 2^40 bytes.
*/
#define ESL_MBYTES(x) ((x) * 1048576)
#define ESL_GBYTES(x) ((x) * 1024 * 1048576)
#define ESL_TBYTES(x) ((x) * 1024 * 1024 * 1048576)
/* Round integer <n> up to the nearest multiple of <m>.
* Particularly useful when dealing w/ memory alignment issues.
*/
#define ESL_UPROUND(n, m) ( ((n) + (m)-1) / (m) * (m))
/*****************************************************************
* 3. Macros implementing Easel's function argument conventions
*****************************************************************/
#define esl_byp_IsInternal(p) ((p) == NULL)
#define esl_byp_IsReturned(p) ((p) != NULL && (*p) == NULL)
#define esl_byp_IsProvided(p) ((p) != NULL && (*p) != NULL)
/* Sometimes a shared function API dictates arguments that a function
* doesn't use, and we want to silence compiler warnings about this.
* Putting ESL_UNUSED(x) in the function, for an unused argument <x>,
* satisfies the compiler while generating a no-op.
*/
#define ESL_UNUSED(x) (void)(sizeof((x)))
/* Sometimes we need a macro's value as a string constant
* instead of a number. For example we might have
* #define eslFOO_DEFAULT 42
* and we want to use that default value in an esl_getopts OPTIONS
* array, where the default needs to be specified as a string constant
* "42" not a number 42. We can use a C preprocessor trick for this,
* using the stringize `#` preprocessor operator. It requires two
* steps: one to expand the macro, another to convert the expanded
* value to a string constant. You call ESL_STR(eslFOO_DEFAULT). The
* ESL_XSTR() macro is just to make the trick work.
*/
#define ESL_XSTR(x) #x
#define ESL_STR(x) ESL_XSTR(x)
/*****************************************************************
* 4. Macros implementing Easel's debugging output conventions
*****************************************************************/
/* Debugging hooks, w/ three levels (1-3).
*/
#if eslDEBUGLEVEL >= 1 /* for ESL_DASSERT() macros */
#include <assert.h>
#endif
#if (eslDEBUGLEVEL >= 1)
#define ESL_DPRINTF1(x) printf x
#define ESL_DASSERT1(x) assert x
#else
#define ESL_DPRINTF1(x)
#define ESL_DASSERT1(x)
#endif
#if (eslDEBUGLEVEL >= 2)
#define ESL_DPRINTF2(x) printf x
#define ESL_DASSERT2(x) assert x
#else
#define ESL_DPRINTF2(x)
#define ESL_DASSERT2(x)
#endif
#if (eslDEBUGLEVEL >= 3)
#define ESL_DPRINTF3(x) printf x
#define ESL_DASSERT3(x) assert x
#else
#define ESL_DPRINTF3(x)
#define ESL_DASSERT3(x)
#endif
/*****************************************************************
* 5. Defined constants
*****************************************************************/
/* Making sure TRUE/FALSE are defined, for convenience */
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
/* Some basic mathematical constants.
* Assuming IEEE754 math with 64-bit doubles (53-bit mantissas), we
* want 17 significant decimal digits in our constants. More is
* a waste (but we do it for some anyway).
*/
#define eslCONST_E 2.71828182845904523536028747135
#define eslCONST_PI 3.14159265358979323846264338328
#define eslCONST_EULER 0.57721566490153286060651209008
#define eslCONST_GOLD 1.61803398874989484820458683437
#define eslCONST_LOG2 0.69314718055994529
#define eslCONST_LOG2R 1.44269504088896341
/* Define <eslINFINITY>, <eslNaN> portably.
* ANSI C99 makes it easy (at last!). The failovers to other pre-C99
* methods are legacy now that we require a C99 compiler - but no harm
* leaving them in Just In Case.
*/
#if defined (INFINITY)
#define eslINFINITY INFINITY /* C99 */
#elif defined (HUGE_VAL)
#define eslINFINITY HUGE_VAL /* assume IEEE754 HUGE_VAL = infinity. ok? */
#else
#define eslINFINITY (1.0/0.0) /* portable? */
#endif
#if defined (NAN)
#define eslNaN NAN /* C99 */
#else
#define eslNaN (eslINFINITY/eslINFINITY) /* portably make a IEEE754 NaN */
#endif
/* Define crossovers for numerical approximations.
*/
/* log(1+x) ~ x and 1-e^x = -x approximation.
* Same threshold appears to be optimal for float or double x. xref STL9/138.
*/
#define eslSMALLX1 5e-9
/*****************************************************************
* 6. Basic support for Easel's digitized biosequences.
*****************************************************************/
/* Most of this support is in the alphabet module, but we externalize
* some into the easel foundation because ESL_INMAP is used in
* sqio, msa modules.
*
* A digital sequence residue (ESL_DSQ) is an unsigned 8-bit type
* (0..255). A valid digital residue has a value in the range 0..127
* (Easel can represent alphabets of up to 128 different characters).
* Values 128..255 are reserved for flags.
*
* An "inmap" is ESL_DSQ[128], or *ESL_DSQ allocated for 128 values;
* it is a many-to-one construct for mapping 7-bit ASCII chars (in
* range 0..127) either to new ASCII chars (in the case of raw
* sequence input in sqio, msa) or to digital codes (in the alphabet
* module). Valid mapped values are 0..127; any value in range
* 128..255 is some kind of flag.
*/
typedef uint8_t ESL_DSQ;
#define eslDSQ_SENTINEL 255 /* sentinel bytes 0,L+1 in a dsq */
#define eslDSQ_ILLEGAL 254 /* input symbol is unmapped and unexpected */
#define eslDSQ_IGNORED 253 /* input symbol is unmapped and ignored */
#define eslDSQ_EOL 252 /* input symbol marks end of a line */
#define eslDSQ_EOD 251 /* input symbol marks end of a seq record */
/* If you try to test sym > 0 && sym <= 127 below, instead of isascii(sym),
* you'll get a compiler warning for an always-successful test regardless
* of whether a char is signed or unsigned. So we trust that isascii() is
* doing the Right Thing.
*/
#define esl_inmap_IsValid(inmap, sym) (isascii(sym) && (inmap)[(int)sym] <= 127)
/*****************************************************************
* 7. Using optional compiler attributes
*****************************************************************/
/* It's convenient to be able to use some optional features of
* gcc-like compilers, especially when developing, but we have to be
* able to make these features vanish when the compiler doesn't
* support them.
*/
/* ESL_ATTRIBUTE_NORETURN
* Static analyzers (like clang's) may need to be clued in when
* a function cannot return: fatal error handlers, for example.
* gcc-like compilers support the __attribute__((__noreturn__))
* extension on function declarations. Functions that don't return
* are declared like:
* extern void fatal(char *msg, ...) ESL_ATTRIBUTE_NORETURN;
*/
#ifdef HAVE_FUNC_ATTRIBUTE_NORETURN
#define ESL_ATTRIBUTE_NORETURN __attribute__((__noreturn__))
#else
#define ESL_ATTRIBUTE_NORETURN
#endif
/* ESL_ATTRIBUTE_FORMAT
* For printf()-like functions with a variable number of arguments
* corresponding to a format string, a compiler is generally unable
* to check that the args match the format. gcc-like compilers
* allow declaring a "format" attribute to enable typechecking of
* printf-like functions. The arguments are printf, <string_idx>,
* <first-to-check>. For example:
* extern void my_printf(FILE *fp, char *s, ...) ESL_ATTRIBUTE_FORMAT(printf, 2, 3);
*/
#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
#define ESL_ATTRIBUTE_FORMAT(type,idx,first) __attribute__((format(type,idx,first)))
#else
#define ESL_ATTRIBUTE_FORMAT(type,idx,first)
#endif
/*****************************************************************
* 8. Miscellaneous.
*****************************************************************/
/* A placeholder for helping w/ portability of filenames/paths.
* I think, but have not tested, that:
* VMS: #define DIRSLASH ']'
* ancient MacOS: #define DIRSLASH ':'
* DOS: #define DIRSLASH '\\'
* Setting DIRSLASH correctly is probably not the only thing
* that would need to be done to port to other OS's, but it's
* probably a start.
*
* The code assumes that '.' is used for file name extensions,
* such as "foo.bar".
*
* This gets used in easel.c's *_File*() functions.
*/
#define eslDIRSLASH '/' /* UNIX directory paths have /foo/bar */
/* Some generic macros for swapping, min, and max.
*/
#define ESL_SWAP(x, y, type) do { type esltmpxyz = (x); (x) = (y); (y) = esltmpxyz; } while (0)
#define ESL_MIN(a,b) (((a)<(b))?(a):(b))
#define ESL_MAX(a,b) (((a)>(b))?(a):(b))
static inline float esl_log (double x) { return (x == 0.0 ? -eslINFINITY : log(x)); } /* avoid fp exceptions; log(0) = -inf is fine */
static inline float esl_log2 (double x) { return (x == 0.0 ? -eslINFINITY : log2(x)); }
static inline float esl_logf (float x) { return (x == 0.0 ? -eslINFINITY : logf(x)); }
static inline float esl_log2f(float x) { return (x == 0.0 ? -eslINFINITY : log2f(x)); }
/* Typedef: <esl_pos_t>
*
* <esl_pos_t> is a signed integer type suitable for safe casting
* to EITHER an <off_t> or <size_t> on this system (i.e. as a position
* in memory or in a file on disk), where we may use a -1 as a flag
* (or even other negative numbers).
*
* <esl_pos_t> is for use for anything having to do with positions in
* large buffers, strings, or files, where we want strict control
* of integer range limits.
*
* Note that POSIX requires size_t to be unsigned, and off_t to be
* signed.
*/
typedef int64_t esl_pos_t;
/*****************************************************************
* 9. The API declarations for easel.c
*****************************************************************/
/* 1. Error handling. */
typedef void (*esl_exception_handler_f)(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp);
extern void esl_fail(char *errbuf, const char *format, ...);
extern void esl_exception(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, ...);
extern void esl_exception_SetHandler(esl_exception_handler_f);
extern void esl_exception_ResetDefaultHandler(void);
extern void esl_nonfatal_handler(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp);
extern void esl_fatal(const char *format, ...) ESL_ATTRIBUTE_NORETURN;
/* 2. Memory allocation/deallocation conventions. */
extern void esl_free(void *p);
extern void esl_Free2D(void **p, int dim1);
extern void esl_Free3D(void ***p, int dim1, int dim2);
/* 3. Standard banner for Easel miniapplications. */
extern int esl_banner (FILE *fp, const char *progname, char *banner);
extern int esl_usage (FILE *fp, const char *progname, char *usage);
extern int esl_dataheader(FILE *fp, ...);
/* 4. Improved replacements for some C library functions */
extern int esl_fgets(char **buf, int *n, FILE *fp);
extern int esl_fprintf(FILE *fp, const char *format, ...);
extern int esl_printf(const char *format, ...);
extern int esl_strdup(const char *s, int64_t n, char **ret_dup);
extern int esl_strcat(char **dest, int64_t ldest, const char *src, int64_t lsrc);
extern int esl_strmapcat (const ESL_DSQ *inmap, char **dest, int64_t *ldest, const char *src, esl_pos_t lsrc);
extern int esl_strmapcat_noalloc(const ESL_DSQ *inmap, char *dest, int64_t *ldest, const char *src, esl_pos_t lsrc);
extern int esl_strtok (char **s, char *delim, char **ret_tok);
extern int esl_strtok_adv(char **s, char *delim, char **ret_tok, int *opt_toklen, char *opt_endchar);
extern int esl_sprintf (char **ret_s, const char *format, ...);
extern int esl_vsprintf(char **ret_s, const char *format, va_list *ap);
extern int esl_strcmp(const char *s1, const char *s2);
/* 5. Portable drop-in replacements for non-standard C functions */
#ifndef HAVE_STRCASECMP
#ifdef _MSC_VER
#define strcasecmp stricmp
#else
extern int esl_strcasecmp(const char *s1, const char *s2);
#define strcasecmp esl_strcasecmp
#endif
#endif
#ifndef HAVE_STRSEP
extern char *esl_strsep(char **stringp, const char *delim);
#define strsep esl_strsep
#endif
/* 6. Additional string functions, esl_str*() */
extern int esl_strchop(char *s, int64_t n);
extern int esl_strdealign(char *s, const char *aseq, const char *gapchars, int64_t *opt_rlen);
extern int esl_str_IsBlank(char *s);
extern int esl_str_IsInteger(char *s);
extern int esl_str_IsReal(char *s);
extern int64_t esl_str_GetMaxWidth(char **s, int n);
/* 7. File path/name manipulation functions, including tmpfiles */
extern int esl_FileExists(const char *filename);
extern int esl_FileTail(const char *path, int nosuffix, char **ret_file);
extern int esl_file_Extension(char *filename, esl_pos_t n_ignore, char **ret_sfx, esl_pos_t *ret_n);
extern int esl_FileConcat(const char *dir, const char *file, char **ret_path);
extern int esl_FileNewSuffix(const char *filename, const char *sfx, char **ret_newpath);
extern int esl_FileEnvOpen(const char *fname, const char *env,
FILE **ret_fp, char **ret_path);
extern int esl_tmpfile(char *basename6X, FILE **ret_fp);
extern int esl_tmpfile_named(char *basename6X, FILE **ret_fp);
extern int esl_getcwd(char **ret_cwd);
/* 8. Typed comparison routines. */
extern int esl_DCompare (double a, double b, double tol);
extern int esl_FCompare (float a, float b, float tol);
extern int esl_DCompareAbs(double a, double b, double tol);
extern int esl_FCompareAbs(float a, float b, float tol);
extern int esl_DCompareNew(double x0, double x, double r_tol, double a_tol);
extern int esl_FCompareNew(float x0, float x, float r_tol, float a_tol);
extern int esl_CCompare(char *s1, char *s2);
#endif /*eslEASEL_INCLUDED*/