Skip to content

Commit

Permalink
Merge pull request #64 from andrewjpage/master
Browse files Browse the repository at this point in the history
Updates to coords other minor bugs
  • Loading branch information
andrewjpage committed Mar 8, 2013
2 parents bb287fe + 8e525ec commit 9f5be3a
Show file tree
Hide file tree
Showing 18 changed files with 227 additions and 197 deletions.
16 changes: 12 additions & 4 deletions src/block_tab_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,19 @@
#include <string.h>
#include "block_tab_file.h"

void print_block_details(FILE * block_file_pointer, int start_coordinate, int end_coordinate, int number_of_snps, char * current_node_id, char * parent_node_id, char * taxon_names)
void print_block_details(FILE * block_file_pointer, int start_coordinate, int end_coordinate, int number_of_snps, char * current_node_id, char * parent_node_id, char * taxon_names, int number_of_child_nodes)
{
fprintf(block_file_pointer, "FT misc_feature %d..%d\n", start_coordinate+1, end_coordinate+1);
fprintf(block_file_pointer, "FT misc_feature %d..%d\n", start_coordinate, end_coordinate);
fprintf(block_file_pointer, "FT /node=\"%s->%s\"\n",parent_node_id,current_node_id);
fprintf(block_file_pointer, "FT /colour=2\n");

if(number_of_child_nodes > 0)
{
fprintf(block_file_pointer, "FT /colour=2\n");
}
else
{
fprintf(block_file_pointer, "FT /colour=4\n");
}
fprintf(block_file_pointer, "FT /taxa=\"%s\"\n",taxon_names);
fprintf(block_file_pointer, "FT /SNP_count=%d\n",number_of_snps);
fflush(block_file_pointer);
Expand All @@ -38,7 +46,7 @@ void print_branch_snp_details(FILE * branch_snps_file_pointer, char * current_no
int i = 0;
for(i=0; i< number_of_branch_snps; i++)
{
fprintf(branch_snps_file_pointer, "FT variation %d\n", branches_snp_sites[i]+1);
fprintf(branch_snps_file_pointer, "FT variation %d\n", branches_snp_sites[i]);
fprintf(branch_snps_file_pointer, "FT /node=\"%s->%s\"\n",parent_node_id,current_node_id);
fprintf(branch_snps_file_pointer, "FT /colour=4\n");
fprintf(branch_snps_file_pointer, "FT /taxa=\"%s\"\n",taxon_names);
Expand Down
2 changes: 1 addition & 1 deletion src/block_tab_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
#ifndef _BLOCK_TAB_FILE_H_
#define _BLOCK_TAB_FILE_H_

void print_block_details(FILE * block_file_pointer, int start_coordinate, int end_coordinate, int number_of_snps, char * current_node_id, char * parent_node_id, char * taxon_names);
void print_block_details(FILE * block_file_pointer, int start_coordinate, int end_coordinate, int number_of_snps, char * current_node_id, char * parent_node_id, char * taxon_names,int number_of_child_nodes);
void print_branch_snp_details(FILE * branch_snps_file_pointer, char * current_node_id, char * parent_node_id, int * branches_snp_sites, int number_of_branch_snps, char * branch_snp_sequence, char * branch_snp_ancestor_sequence,char * taxon_names);
#endif
8 changes: 4 additions & 4 deletions src/branch_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ void carry_unambiguous_gaps_up_tree(newick_node *root)
int parent_sequence_index = find_sequence_index_from_sample_name(root->taxon);

child = root->child;
int child_sequence_indices[number_of_snps_in_phylip()];
int child_sequence_indices[root->childNum];
int child_counter = 0;
while (child != NULL)
{
Expand Down Expand Up @@ -387,8 +387,8 @@ void get_likelihood_for_windows(char * child_sequence, int length_of_sequence, i
}

int current_window_start_coordinate = window_start_coordinate;
// Minimum statistically significant window size divided by 2 (Nyquist).
window_start_coordinate += (int) MIN_WINDOW_SIZE/2;
// Window size divided by 2 (Nyquist).
window_start_coordinate += (int) window_size/2;
// Move to next snp, more efficient but then the adjacent block check doesnt work.
//window_start_coordinate = advance_window_start_to_next_snp(window_start_coordinate, snp_site_coords, child_sequence, number_of_branch_snps);s

Expand Down Expand Up @@ -536,7 +536,7 @@ int flag_smallest_log_likelihood_recombinations(int ** candidate_blocks, int num
//current_node->recombinations = realloc(current_node->recombinations, number_of_recombinations*sizeof(int));
current_node->num_recombinations = number_of_recombinations;

print_block_details(block_file_pointer, candidate_blocks[0][smallest_index], candidate_blocks[1][smallest_index], number_of_recombinations_in_window, current_node->taxon, root->taxon, current_node->taxon_names);
print_block_details(block_file_pointer, candidate_blocks[0][smallest_index], candidate_blocks[1][smallest_index], number_of_recombinations_in_window, current_node->taxon, root->taxon, current_node->taxon_names, current_node->childNum);
print_gff_line(gff_file_pointer, candidate_blocks[0][smallest_index], candidate_blocks[1][smallest_index], number_of_recombinations_in_window, current_node->taxon, root->taxon, current_node->taxon_names);
current_node->number_of_blocks = current_node->number_of_blocks + 1;

Expand Down
2 changes: 1 addition & 1 deletion src/gubbins.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fast

filter_sequence_bases_and_rotate(reference_sequence_bases, filtered_bases_for_snps, number_of_filtered_snps);
create_phylip_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes);
create_vcf_file(tree_filename, filtered_snp_locations, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes);
create_vcf_file(tree_filename, filtered_snp_locations, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes,0);
create_fasta_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes);

// Create an new tree with updated distances
Expand Down
132 changes: 72 additions & 60 deletions src/kseq.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* The MIT License
Copyright (c) 2008 Genome Research Ltd (GRL).
Copyright (c) 2008, 2009, 2011 Attractive Chaos <[email protected]>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand All @@ -23,9 +23,7 @@
SOFTWARE.
*/

/* Contact: Heng Li <[email protected]> */

/* Last Modified: 12APR2009 */
/* Last Modified: 05MAR2012 */

#ifndef AC_KSEQ_H
#define AC_KSEQ_H
Expand All @@ -36,11 +34,12 @@

#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' '
#define KS_SEP_MAX 1
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
#define KS_SEP_MAX 2

#define __KS_TYPE(type_t) \
typedef struct __kstream_t { \
char *buf; \
unsigned char *buf; \
int begin, end, is_eof; \
type_t f; \
} kstream_t;
Expand All @@ -53,7 +52,7 @@
{ \
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
ks->f = f; \
ks->buf = (char*)malloc(__bufsize); \
ks->buf = (unsigned char*)malloc(__bufsize); \
return ks; \
} \
static inline void ks_destroy(kstream_t *ks) \
Expand Down Expand Up @@ -90,10 +89,10 @@ typedef struct __kstring_t {
#endif

#define __KS_GETUNTIL(__read, __bufsize) \
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
{ \
if (dret) *dret = 0; \
str->l = 0; \
str->l = append? str->l : 0; \
if (ks->begin >= ks->end && ks->is_eof) return -1; \
for (;;) { \
int i; \
Expand All @@ -105,7 +104,10 @@ typedef struct __kstring_t {
if (ks->end == 0) break; \
} else break; \
} \
if (delimiter > KS_SEP_MAX) { \
if (delimiter == KS_SEP_LINE) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == '\n') break; \
} else if (delimiter > KS_SEP_MAX) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == delimiter) break; \
} else if (delimiter == KS_SEP_SPACE) { \
Expand All @@ -115,7 +117,7 @@ typedef struct __kstring_t {
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
} else i = 0; /* never come to here! */ \
if (str->m - str->l < i - ks->begin + 1) { \
if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
str->m = str->l + (i - ks->begin) + 1; \
kroundup32(str->m); \
str->s = (char*)realloc(str->s, str->m); \
Expand All @@ -128,33 +130,32 @@ typedef struct __kstring_t {
break; \
} \
} \
if (str->l == 0) { \
if (str->s == 0) { \
str->m = 1; \
str->s = (char*)calloc(1, 1); \
} \
} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
str->s[str->l] = '\0'; \
return str->l; \
}
} \
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
{ return ks_getuntil2(ks, delimiter, str, dret, 0); }

#define KSTREAM_INIT(type_t, __read, __bufsize) \
__KS_TYPE(type_t) \
__KS_BASIC(type_t, __bufsize) \
__KS_GETC(__read, __bufsize) \
__KS_GETUNTIL(__read, __bufsize)

#define __KSEQ_BASIC(type_t) \
static inline kseq_t *kseq_init(type_t fd) \
#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)

#define __KSEQ_BASIC(SCOPE, type_t) \
SCOPE kseq_t *kseq_init(type_t fd) \
{ \
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
s->f = ks_init(fd); \
return s; \
} \
static inline void kseq_rewind(kseq_t *ks) \
{ \
ks->last_char = 0; \
ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
} \
static inline void kseq_destroy(kseq_t *ks) \
SCOPE void kseq_destroy(kseq_t *ks) \
{ \
if (!ks) return; \
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
Expand All @@ -167,44 +168,46 @@ typedef struct __kstring_t {
-1 end-of-file
-2 truncated quality string
*/
#define __KSEQ_READ \
static int kseq_read(kseq_t *seq) \
{ \
int c; \
kstream_t *ks = seq->f; \
#define __KSEQ_READ(SCOPE) \
SCOPE int kseq_read(kseq_t *seq) \
{ \
int c; \
kstream_t *ks = seq->f; \
if (seq->last_char == 0) { /* then jump to the next header line */ \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
if (c == -1) return -1; /* end of file */ \
seq->last_char = c; \
} /* the first header char has been read */ \
seq->comment.l = seq->seq.l = seq->qual.l = 0; \
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
if (c == -1) return -1; /* end of file */ \
seq->last_char = c; \
} /* else: the first header char has been read in the previous call */ \
seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
seq->seq.m = 256; \
seq->seq.s = (char*)malloc(seq->seq.m); \
} \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
if (isgraph(c)) { /* printable non-space character */ \
if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
seq->seq.m = seq->seq.l + 2; \
kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
} \
seq->seq.s[seq->seq.l++] = (char)c; \
} \
} \
if (c == '\n') continue; /* skip empty lines */ \
seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
} \
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
if (c != '+') return seq->seq.l; /* FASTA */ \
if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
seq->qual.m = seq->seq.m; \
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
} \
if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
seq->seq.m = seq->seq.l + 2; \
kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
} \
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
if (c != '+') return seq->seq.l; /* FASTA */ \
if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
seq->qual.m = seq->seq.m; \
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
} \
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
if (c == -1) return -2; /* we should not stop here */ \
while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
if (c == -1) return -2; /* error: no quality string */ \
while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
seq->last_char = 0; /* we have not come to the next header line */ \
if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
return seq->seq.l; \
if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
return seq->seq.l; \
}

#define __KSEQ_TYPE(type_t) \
Expand All @@ -214,10 +217,19 @@ typedef struct __kstring_t {
kstream_t *f; \
} kseq_t;

#define KSEQ_INIT(type_t, __read) \
KSTREAM_INIT(type_t, __read, 1048576) \
#define KSEQ_INIT2(SCOPE, type_t, __read) \
KSTREAM_INIT(type_t, __read, 16384) \
__KSEQ_TYPE(type_t) \
__KSEQ_BASIC(type_t) \
__KSEQ_READ
__KSEQ_BASIC(SCOPE, type_t) \
__KSEQ_READ(SCOPE)

#endif
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)

#define KSEQ_DECLARE(type_t) \
__KS_TYPE(type_t) \
__KSEQ_TYPE(type_t) \
extern kseq_t *kseq_init(type_t fd); \
void kseq_destroy(kseq_t *ks); \
int kseq_read(kseq_t *seq);

#endif
4 changes: 2 additions & 2 deletions src/parse_phylip.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ void fill_in_unambiguous_gaps_in_parent_from_children(int parent_sequence_index,
for(child_counter = 0 ; child_counter < num_children ; child_counter++)
{
int child_index = child_sequence_indices[child_counter];
if(!(sequences[child_index][snp_counter] == 'N' || sequences[child_index][snp_counter] == '-' ))
if(!( toupper(sequences[child_index][snp_counter]) == 'N' || sequences[child_index][snp_counter] == '-' ))
{
real_base_found = 1;
break;
}
}

if(real_base_found == 0 && sequences[parent_sequence_index][snp_counter] != 'N' && sequences[parent_sequence_index][snp_counter] != '-')
if(real_base_found == 0 && toupper(sequences[parent_sequence_index][snp_counter]) != 'N' && sequences[parent_sequence_index][snp_counter] != '-')
{
sequences[parent_sequence_index][snp_counter] = 'N';
}
Expand Down
2 changes: 1 addition & 1 deletion src/snp_sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ int generate_snp_sites(char filename[], int exclude_gaps, char suffix[])

strcat(filename_without_directory,suffix);

create_vcf_file(filename_without_directory, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples,internal_nodes);
create_vcf_file(filename_without_directory, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples,internal_nodes,1);
create_phylip_of_snp_sites(filename_without_directory, number_of_snps, bases_for_snps, sequence_names, number_of_samples,internal_nodes);
create_fasta_of_snp_sites(filename_without_directory, number_of_snps, bases_for_snps, sequence_names, number_of_samples,internal_nodes);

Expand Down
12 changes: 6 additions & 6 deletions src/vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "parse_phylip.h"


void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples,int internal_nodes[])
void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples,int internal_nodes[], int offset)
{
FILE *vcf_file_pointer;
char * base_filename;
Expand All @@ -37,16 +37,16 @@ void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, ch

vcf_file_pointer=fopen(strcat(base_filename,".vcf"), "w");
output_vcf_header(vcf_file_pointer,sequence_names, number_of_samples,internal_nodes);
output_vcf_snps(vcf_file_pointer, bases_for_snps, snp_locations, number_of_snps, number_of_samples,internal_nodes);
output_vcf_snps(vcf_file_pointer, bases_for_snps, snp_locations, number_of_snps, number_of_samples,internal_nodes,offset);
fclose(vcf_file_pointer);
}

void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples,int internal_nodes[])
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples,int internal_nodes[], int offset)
{
int i;
for(i=0; i < number_of_snps; i++)
{
output_vcf_row(vcf_file_pointer, bases_for_snps[i], snp_locations[i], number_of_samples,internal_nodes);
output_vcf_row(vcf_file_pointer, bases_for_snps[i], snp_locations[i], number_of_samples,internal_nodes, offset);
}
}

Expand All @@ -69,7 +69,7 @@ void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int num
fprintf( vcf_file_pointer, "\n");
}

void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples,int internal_nodes[])
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples,int internal_nodes[], int offset)
{
char reference_base = bases_for_snp[0];
char alt_bases[30];
Expand All @@ -82,7 +82,7 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
fprintf( vcf_file_pointer, "1\t");

// Position
fprintf( vcf_file_pointer, "%d\t", (int) snp_location +1);
fprintf( vcf_file_pointer, "%d\t", (int) snp_location + offset);

//ID
fprintf( vcf_file_pointer, ".\t");
Expand Down
6 changes: 3 additions & 3 deletions src/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#define _VCF_H_

void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples,int internal_nodes[]);
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples,int internal_nodes[]);
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples,int internal_nodes[]);
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples,int internal_nodes[]);
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples,int internal_nodes[], int offset);
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples,int internal_nodes[], int offset);
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples,int internal_nodes[], int offset);
void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base, char * bases_for_snp, int number_of_samples,int internal_nodes[]);
void alternative_bases(char reference_base, char * bases_for_snp, char alt_bases[], int number_of_samples);
int check_if_char_in_string(char search_string[], char target_char, int search_string_length);
Expand Down
Loading

0 comments on commit 9f5be3a

Please sign in to comment.