Skip to content

Commit

Permalink
Merge pull request #87 from andrewjpage/master
Browse files Browse the repository at this point in the history
translate snps and gaps to original genome space coords
  • Loading branch information
andrewjpage committed Jul 4, 2013
2 parents 4396e1a + b3ff533 commit df4ebb5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
33 changes: 22 additions & 11 deletions src/branch_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ void get_likelihood_for_windows(char * child_sequence, int length_of_sequence, i

int cutoff = calculate_cutoff(branch_genome_size, window_size, number_of_branch_snps);

number_of_blocks = get_blocks(block_coordinates, length_of_original_genome, snp_site_coords, number_of_branch_snps, window_size, cutoff, original_sequence);
number_of_blocks = get_blocks(block_coordinates, length_of_original_genome, snp_site_coords, number_of_branch_snps, window_size, cutoff, original_sequence,snp_locations,length_of_sequence);


for(i = 0; i < number_of_blocks; i++)
Expand Down Expand Up @@ -484,14 +484,14 @@ void get_likelihood_for_windows(char * child_sequence, int length_of_sequence, i
}
}

int extend_upper_part_of_window(int starting_coord, int initial_max_coord, int genome_size, char * original_sequence)
int extend_upper_part_of_window(int starting_coord, int initial_max_coord, int genome_size, int * gaps_in_original_genome_space)
{
int max_snp_sliding_window_counter = initial_max_coord;
int upper_offset = 0;
int j = 0;
for(j = starting_coord; (j < initial_max_coord + upper_offset) && (j < genome_size); j++)
{
if(original_sequence[j] == 'N' || original_sequence[j] == '-' )
if(gaps_in_original_genome_space[j] == 1)
{
upper_offset++;
}
Expand All @@ -501,14 +501,14 @@ int extend_upper_part_of_window(int starting_coord, int initial_max_coord, int g
return max_snp_sliding_window_counter;
}

int extend_lower_part_of_window(int starting_coord, int initial_min_coord, int genome_size, char * original_sequence)
int extend_lower_part_of_window(int starting_coord, int initial_min_coord, int genome_size, int * gaps_in_original_genome_space)
{
int lower_offset = 0;
int snp_sliding_window_counter = initial_min_coord;
int j = 0;
for(j = starting_coord; (j > 0) && j > (initial_min_coord - lower_offset) ; j--)
for(j = starting_coord; (j >= 0) && j > (initial_min_coord - lower_offset) && (initial_min_coord - lower_offset >= 0); j--)
{
if(original_sequence[j] == 'N' || original_sequence[j] == '-' )
if(gaps_in_original_genome_space[j] == 1)
{
lower_offset++;
}
Expand All @@ -518,7 +518,7 @@ int extend_lower_part_of_window(int starting_coord, int initial_min_coord, int g
}


int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff, char * original_sequence)
int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff, char * original_sequence, int * snp_locations, int number_of_snps)
{
// Set up the window counter with 1 value per base in the branch
int * window_count;
Expand All @@ -529,6 +529,18 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
window_count[i] = 0;
}

// Integer array with location of gaps
int * gaps_in_original_genome_space;
gaps_in_original_genome_space = (int *) calloc((genome_size+1),sizeof(int));
int x =0;
for(x=0; x< number_of_snps; x++)
{
if(original_sequence[x] == 'N' || original_sequence[x] == '-' )
{
gaps_in_original_genome_space[snp_locations[x]] = 1;
}
}


// create the pileup of the snps and their sphere of influence
int snp_counter = 0;
Expand All @@ -538,7 +550,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
// Lower bound of the window around a snp
int snp_sliding_window_counter = snp_site_coords[snp_counter]-(window_size/2);

snp_sliding_window_counter = extend_lower_part_of_window(snp_site_coords[snp_counter] - 1 , snp_sliding_window_counter, genome_size, original_sequence);
snp_sliding_window_counter = extend_lower_part_of_window(snp_site_coords[snp_counter] - 1 , snp_sliding_window_counter, genome_size, gaps_in_original_genome_space);

if(snp_sliding_window_counter < 0)
{
Expand All @@ -547,7 +559,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i

// Upper bound of the window around a snp
int max_snp_sliding_window_counter = snp_site_coords[snp_counter]+(window_size/2);
max_snp_sliding_window_counter = extend_upper_part_of_window(snp_site_coords[snp_counter] + 1, max_snp_sliding_window_counter, genome_size, original_sequence);
max_snp_sliding_window_counter = extend_upper_part_of_window(snp_site_coords[snp_counter] + 1, max_snp_sliding_window_counter, genome_size, gaps_in_original_genome_space);

if(max_snp_sliding_window_counter>genome_size)
{
Expand All @@ -565,8 +577,6 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
int block_lower_bound = 0;
// Scan across the pileup and record where blocks are above the cutoff

// Skip over gaps

for(i = 0; i < genome_size; i++)
{
// Just entered the start of a block
Expand All @@ -586,6 +596,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
}

}
free(gaps_in_original_genome_space);
free(window_count);
return number_of_blocks;

Expand Down
5 changes: 3 additions & 2 deletions src/branch_sequences.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ int flag_smallest_log_likelihood_recombinations(int ** candidate_blocks, int num
int calculate_number_of_bases_in_recombations_excluding_gaps(int ** block_coordinates, int num_blocks,char * child_sequence, int * snp_locations,int length_of_original_genome);
void carry_unambiguous_gaps_up_tree(newick_node *root);
void move_blocks_inwards_while_likelihood_improves(int number_of_blocks,int ** block_coordinates, int min_snps, int * snp_site_coords, int number_of_branch_snps,char * branch_snp_sequence, int * snp_locations, int branch_genome_size,char * child_sequence, int length_of_sequence, double * block_likelihoods, int cutoff_value);
int get_blocks(int ** block_coordinates, int branch_genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff, char * original_sequence);

int get_blocks(int ** block_coordinates, int branch_genome_size,int * snp_site_coords,int number_of_branch_snps, int window_size, int cutoff, char * original_sequence, int * snp_locations, int number_of_snps);
int extend_lower_part_of_window(int starting_coord, int initial_min_coord, int genome_size, int * gaps_in_original_genome_space);
int extend_upper_part_of_window(int starting_coord, int initial_max_coord, int genome_size, int * gaps_in_original_genome_space);

#define WINDOW_SNP_MODE_TARGET 10
#define MIN_WINDOW_SIZE 100
Expand Down

0 comments on commit df4ebb5

Please sign in to comment.