-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #64 from andrewjpage/master
Updates to coords other minor bugs
- Loading branch information
Showing
18 changed files
with
227 additions
and
197 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* The MIT License | ||
Copyright (c) 2008 Genome Research Ltd (GRL). | ||
Copyright (c) 2008, 2009, 2011 Attractive Chaos <[email protected]> | ||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
|
@@ -23,9 +23,7 @@ | |
SOFTWARE. | ||
*/ | ||
|
||
/* Contact: Heng Li <[email protected]> */ | ||
|
||
/* Last Modified: 12APR2009 */ | ||
/* Last Modified: 05MAR2012 */ | ||
|
||
#ifndef AC_KSEQ_H | ||
#define AC_KSEQ_H | ||
|
@@ -36,11 +34,12 @@ | |
|
||
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r | ||
#define KS_SEP_TAB 1 // isspace() && !' ' | ||
#define KS_SEP_MAX 1 | ||
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) | ||
#define KS_SEP_MAX 2 | ||
|
||
#define __KS_TYPE(type_t) \ | ||
typedef struct __kstream_t { \ | ||
char *buf; \ | ||
unsigned char *buf; \ | ||
int begin, end, is_eof; \ | ||
type_t f; \ | ||
} kstream_t; | ||
|
@@ -53,7 +52,7 @@ | |
{ \ | ||
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ | ||
ks->f = f; \ | ||
ks->buf = (char*)malloc(__bufsize); \ | ||
ks->buf = (unsigned char*)malloc(__bufsize); \ | ||
return ks; \ | ||
} \ | ||
static inline void ks_destroy(kstream_t *ks) \ | ||
|
@@ -90,10 +89,10 @@ typedef struct __kstring_t { | |
#endif | ||
|
||
#define __KS_GETUNTIL(__read, __bufsize) \ | ||
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ | ||
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ | ||
{ \ | ||
if (dret) *dret = 0; \ | ||
str->l = 0; \ | ||
str->l = append? str->l : 0; \ | ||
if (ks->begin >= ks->end && ks->is_eof) return -1; \ | ||
for (;;) { \ | ||
int i; \ | ||
|
@@ -105,7 +104,10 @@ typedef struct __kstring_t { | |
if (ks->end == 0) break; \ | ||
} else break; \ | ||
} \ | ||
if (delimiter > KS_SEP_MAX) { \ | ||
if (delimiter == KS_SEP_LINE) { \ | ||
for (i = ks->begin; i < ks->end; ++i) \ | ||
if (ks->buf[i] == '\n') break; \ | ||
} else if (delimiter > KS_SEP_MAX) { \ | ||
for (i = ks->begin; i < ks->end; ++i) \ | ||
if (ks->buf[i] == delimiter) break; \ | ||
} else if (delimiter == KS_SEP_SPACE) { \ | ||
|
@@ -115,7 +117,7 @@ typedef struct __kstring_t { | |
for (i = ks->begin; i < ks->end; ++i) \ | ||
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ | ||
} else i = 0; /* never come to here! */ \ | ||
if (str->m - str->l < i - ks->begin + 1) { \ | ||
if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ | ||
str->m = str->l + (i - ks->begin) + 1; \ | ||
kroundup32(str->m); \ | ||
str->s = (char*)realloc(str->s, str->m); \ | ||
|
@@ -128,33 +130,32 @@ typedef struct __kstring_t { | |
break; \ | ||
} \ | ||
} \ | ||
if (str->l == 0) { \ | ||
if (str->s == 0) { \ | ||
str->m = 1; \ | ||
str->s = (char*)calloc(1, 1); \ | ||
} \ | ||
} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ | ||
str->s[str->l] = '\0'; \ | ||
return str->l; \ | ||
} | ||
} \ | ||
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ | ||
{ return ks_getuntil2(ks, delimiter, str, dret, 0); } | ||
|
||
#define KSTREAM_INIT(type_t, __read, __bufsize) \ | ||
__KS_TYPE(type_t) \ | ||
__KS_BASIC(type_t, __bufsize) \ | ||
__KS_GETC(__read, __bufsize) \ | ||
__KS_GETUNTIL(__read, __bufsize) | ||
|
||
#define __KSEQ_BASIC(type_t) \ | ||
static inline kseq_t *kseq_init(type_t fd) \ | ||
#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) | ||
|
||
#define __KSEQ_BASIC(SCOPE, type_t) \ | ||
SCOPE kseq_t *kseq_init(type_t fd) \ | ||
{ \ | ||
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ | ||
s->f = ks_init(fd); \ | ||
return s; \ | ||
} \ | ||
static inline void kseq_rewind(kseq_t *ks) \ | ||
{ \ | ||
ks->last_char = 0; \ | ||
ks->f->is_eof = ks->f->begin = ks->f->end = 0; \ | ||
} \ | ||
static inline void kseq_destroy(kseq_t *ks) \ | ||
SCOPE void kseq_destroy(kseq_t *ks) \ | ||
{ \ | ||
if (!ks) return; \ | ||
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ | ||
|
@@ -167,44 +168,46 @@ typedef struct __kstring_t { | |
-1 end-of-file | ||
-2 truncated quality string | ||
*/ | ||
#define __KSEQ_READ \ | ||
static int kseq_read(kseq_t *seq) \ | ||
{ \ | ||
int c; \ | ||
kstream_t *ks = seq->f; \ | ||
#define __KSEQ_READ(SCOPE) \ | ||
SCOPE int kseq_read(kseq_t *seq) \ | ||
{ \ | ||
int c; \ | ||
kstream_t *ks = seq->f; \ | ||
if (seq->last_char == 0) { /* then jump to the next header line */ \ | ||
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ | ||
if (c == -1) return -1; /* end of file */ \ | ||
seq->last_char = c; \ | ||
} /* the first header char has been read */ \ | ||
seq->comment.l = seq->seq.l = seq->qual.l = 0; \ | ||
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \ | ||
if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \ | ||
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ | ||
if (c == -1) return -1; /* end of file */ \ | ||
seq->last_char = c; \ | ||
} /* else: the first header char has been read in the previous call */ \ | ||
seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ | ||
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \ | ||
if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ | ||
if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ | ||
seq->seq.m = 256; \ | ||
seq->seq.s = (char*)malloc(seq->seq.m); \ | ||
} \ | ||
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ | ||
if (isgraph(c)) { /* printable non-space character */ \ | ||
if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \ | ||
seq->seq.m = seq->seq.l + 2; \ | ||
kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \ | ||
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ | ||
} \ | ||
seq->seq.s[seq->seq.l++] = (char)c; \ | ||
} \ | ||
} \ | ||
if (c == '\n') continue; /* skip empty lines */ \ | ||
seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ | ||
ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ | ||
} \ | ||
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ | ||
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ | ||
if (c != '+') return seq->seq.l; /* FASTA */ \ | ||
if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \ | ||
seq->qual.m = seq->seq.m; \ | ||
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ | ||
} \ | ||
if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ | ||
seq->seq.m = seq->seq.l + 2; \ | ||
kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ | ||
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ | ||
} \ | ||
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ | ||
if (c != '+') return seq->seq.l; /* FASTA */ \ | ||
if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ | ||
seq->qual.m = seq->seq.m; \ | ||
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ | ||
} \ | ||
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ | ||
if (c == -1) return -2; /* we should not stop here */ \ | ||
while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \ | ||
if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \ | ||
seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \ | ||
if (c == -1) return -2; /* error: no quality string */ \ | ||
while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ | ||
seq->last_char = 0; /* we have not come to the next header line */ \ | ||
if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \ | ||
return seq->seq.l; \ | ||
if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ | ||
return seq->seq.l; \ | ||
} | ||
|
||
#define __KSEQ_TYPE(type_t) \ | ||
|
@@ -214,10 +217,19 @@ typedef struct __kstring_t { | |
kstream_t *f; \ | ||
} kseq_t; | ||
|
||
#define KSEQ_INIT(type_t, __read) \ | ||
KSTREAM_INIT(type_t, __read, 1048576) \ | ||
#define KSEQ_INIT2(SCOPE, type_t, __read) \ | ||
KSTREAM_INIT(type_t, __read, 16384) \ | ||
__KSEQ_TYPE(type_t) \ | ||
__KSEQ_BASIC(type_t) \ | ||
__KSEQ_READ | ||
__KSEQ_BASIC(SCOPE, type_t) \ | ||
__KSEQ_READ(SCOPE) | ||
|
||
#endif | ||
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) | ||
|
||
#define KSEQ_DECLARE(type_t) \ | ||
__KS_TYPE(type_t) \ | ||
__KSEQ_TYPE(type_t) \ | ||
extern kseq_t *kseq_init(type_t fd); \ | ||
void kseq_destroy(kseq_t *ks); \ | ||
int kseq_read(kseq_t *seq); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.