Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate read end clipping in to allelecounter #60

Open
wants to merge 29 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions c/src/alleleCounter.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
#include "dbg.h"

static int min_base_q = 20;
static int min_map_q = 35;
static int min_map_q = 45;
static int end_clip = 5;
static char *hts_file;
static char *loci_file;
static char *out_file;
Expand Down Expand Up @@ -62,6 +63,7 @@ void alleleCounter_print_usage (int exit_code){
printf (" can't be found alleleCounter may fail to work correctly.\n");
printf (" -m --min-base-qual [int] Minimum base quality [Default: %d].\n",min_base_q);
printf (" -q --min-map-qual [int] Minimum mapping quality [Default: %d].\n",min_map_q);
printf (" -e --end-clip-reads [int] Amount of bases to clip from end of reads [Default: %d].\n",end_clip);
printf (" -c --contig [string] Limit calling to named contig.\n");
printf (" -d --dense-snps Improves performance where many positions are close together \n");
printf (" -x --is-10x Enables 10X processing mode.\n");
Expand Down Expand Up @@ -93,6 +95,7 @@ void alleleCounter_setup_options(int argc, char *argv[]){
{"output-file",required_argument , 0, 'o'},
{"min-base-qual", required_argument, 0, 'm'},
{"min-map-qual", required_argument, 0, 'q'},
{"end_clip", required_argument, 0, 'e'},
{"is-snp6", required_argument, 0, 's'},
{"is-10x", required_argument, 0, 'x'},
{"contig", required_argument, 0, 'c'},
Expand All @@ -108,7 +111,7 @@ void alleleCounter_setup_options(int argc, char *argv[]){
int iarg = 0;

//Iterate through options
while((iarg = getopt_long(argc, argv, "f:F:l:b:m:o:q:r:c:hdsvx", long_opts, &index)) != -1){
while((iarg = getopt_long(argc, argv, "f:F:l:b:m:o:q:r:c:e:hdsvx", long_opts, &index)) != -1){
switch(iarg){
case 'h':
alleleCounter_print_usage(0);
Expand All @@ -134,6 +137,10 @@ void alleleCounter_setup_options(int argc, char *argv[]){
min_map_q = atoi(optarg);
break;

case 'e':
end_clip = atoi(optarg);
break;

case 'b':
hts_file = optarg;
break;
Expand Down Expand Up @@ -393,6 +400,8 @@ int main(int argc, char *argv[]){

bam_access_exc_flag(exc_flag);

bam_access_end_clip(end_clip);

//Open output file for writing
FILE *output = fopen(out_file,"w");
check(output != NULL, "Error opening file %s for write.",out_file);
Expand Down
26 changes: 25 additions & 1 deletion c/src/bam_access.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ int include_dup = 0;
int include_se = 0;
int min_base_qual = 20;
int min_map_qual = 35;
int end_c=5; // might want to change default to 0.
int inc_flag = 3;
int exc_flag = 3852;
int maxitercnt = 1000000000; //Overrride internal maxcnt for iterator!
Expand Down Expand Up @@ -118,9 +119,26 @@ void pileupCounts(const bam_pileup1_t *pil, int n_plp, loci_stats *stats){
for(i=0;i<n_plp;i++){
const bam_pileup1_t *p = pil + i;
int qual = bam_get_qual(p->b)[p->qpos];
//printf("Read posis %d\n" , p->qpos);
//printf("Read posis %d\n" , p->b->query->length);
int read_pos=p->qpos;
//printf("Read posis %d\n" ,read_pos);
//printf("End clip %d\n" ,end_c);
//the_seq=bam_get_seq(p->b);
uint32_t len = p->b->core.l_qseq;
int clip_it=0;
//skip if position is is less than is defined by the -e paramater
if(read_pos<end_c || read_pos>(len-end_c-1))
{
clip_it=1;
continue;
}


uint8_t c = bam_seqi(bam_get_seq(p->b), p->qpos);

int absent;
k = kh_put(strh, h, bam_get_qname(p->b), &absent);
k = kh_put(strh, h, bam_get_qname(p->b), &absent);
uint8_t pre_b;
if(!absent){ //Read already processed to get base processed (we only increment if base is different between overlapping read pairs)
k = kh_get(strh, h, bam_get_qname(p->b));
Expand Down Expand Up @@ -341,6 +359,7 @@ int bam_access_get_multi_position_base_counts(loci_stats **stats, int stats_coun
uint8_t *aux_val_bcode;
uint8_t *aux_val_umi;
//printf("Got another read \n");

if(b->core.qual < min_map_qual || (b->core.flag & exc_flag) || (b->core.flag & inc_flag) != inc_flag) continue;
//Additional check for properly paired reads - they must be in correct paired end orientation
if(inc_flag & BAM_FPROPER_PAIR){
Expand Down Expand Up @@ -492,6 +511,11 @@ void bam_access_min_map_qual(int qual){
return;
}

void bam_access_end_clip(int bases){
end_c = bases;
return;
}

void bam_access_inc_flag(int inc){
inc_flag = inc;
return;
Expand Down
2 changes: 2 additions & 0 deletions c/src/bam_access.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ void bam_access_inc_flag(int inc);

void bam_access_exc_flag(int exc);

void bam_access_end_clip(int base);

int bam_access_openhts(char *hts_file, char *ref_file);

int bam_access_get_position_base_counts(char *chr, int pos, loci_stats *stats,int is_10x,FILE *output);
Expand Down