Skip to content

Commit

Permalink
use alternate format of gff
Browse files Browse the repository at this point in the history
  • Loading branch information
jodyphelan committed May 6, 2019
1 parent 462bdcd commit dad5166
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 12 deletions.
8 changes: 4 additions & 4 deletions pathogenprofiler/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ def __init__(self,bam_file,prefix,ref_file,platform="Illumina",threads=4):
def run_delly(self):
run_cmd("delly call -t DEL -g %(ref_file)s %(bam_file)s -o %(prefix)s.delly.bcf" % vars(self))
return delly_bcf("%(prefix)s.delly.bcf" % vars(self))
def call_variants(self,gff_file=None,bed_file=None,call_method="optimise",min_dp=10,threads=4,mixed_as_missing=False,**kwargs):
def call_variants(self,gff_file=None,bed_file=None,call_method="optimise",min_dp=10,threads=4,mixed_as_missing=False,af=0.0,**kwargs):
add_arguments_to_self(self,locals())
self.gbcf_file = "%s.gbcf" % self.prefix
self.missing_bcf_file = "%s.missing.bcf" % self.prefix
self.gbcf(prefix=self.prefix,call_method=call_method,min_dp=min_dp,threads=threads,vtype="both",bed_file=bed_file,low_dp_as_missing=True)

self.variant_bcf_file = "%s.bcf" % self.prefix
self.del_bed = bcf(self.gbcf_file).del_pos2bed()
self.mix_cmd = "| bcftools +setGT -- -t q -i 'GT=\"het\" & AD[:1]/(AD[:0]+AD[:1])<0.7' -n . |" if mixed_as_missing else ""
run_cmd("bcftools view %(gbcf_file)s %(mix_cmd)s| bcftools view -T ^%(del_bed)s -g miss -O b -o %(missing_bcf_file)s" % vars(self))
run_cmd("bcftools view %(gbcf_file)s %(mix_cmd)s| bcftools view -g ^miss -c 1 -O b -o %(variant_bcf_file)s" % vars(self))
self.af_filter_cmd = "| bcftools view -i 'AF>%s'" % af
run_cmd("bcftools view %(gbcf_file)s %(mix_cmd)s %(af_filter_cmd)s | bcftools view -T ^%(del_bed)s -g miss -O b -o %(missing_bcf_file)s" % vars(self))
run_cmd("bcftools view %(gbcf_file)s %(mix_cmd)s %(af_filter_cmd)s | bcftools view -g ^miss -c 1 -O b -o %(variant_bcf_file)s" % vars(self))

if gff_file and filecheck(gff_file):
self.gff_file = gff_file
Expand Down
4 changes: 2 additions & 2 deletions pathogenprofiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .barcode import *
from .fastq import *
import re
def profiler(conf_file,prefix,r1=None,r2=None,bam_file=None,call_method="low",min_depth=10,platform="Illumina",mapper="bwa",threads=4,run_delly=False):
def profiler(conf_file,prefix,r1=None,r2=None,bam_file=None,call_method="low",min_depth=10,platform="Illumina",mapper="bwa",threads=4,run_delly=False,af=0.0):
conf = json.load(open(conf_file))
for f in conf:
filecheck(conf[f])
Expand All @@ -26,7 +26,7 @@ def profiler(conf_file,prefix,r1=None,r2=None,bam_file=None,call_method="low",mi
else:
log("Using %s\n\nPlease ensure that this BAM was made using the same reference as in the database.\nIf you are not sure what reference was used it is best to remap the reads." % bam_file)
bam_obj = bam(bam_file,prefix,conf["ref"],platform=platform)
bcf_obj = bam_obj.call_variants(prefix=prefix+".targets",call_method=call_method,gff_file=conf["gff"],bed_file=conf["bed"],mixed_as_missing=False if platform == "Illumina" else True,threads=threads,min_dp=min_depth)
bcf_obj = bam_obj.call_variants(prefix=prefix+".targets",call_method=call_method,gff_file=conf["gff"],bed_file=conf["bed"],mixed_as_missing=False if platform == "Illumina" else True,threads=threads,min_dp=min_depth,af=af)
csq = bcf_obj.load_csq(ann_file=conf["ann"])
bam_obj.flagstat()

Expand Down
13 changes: 7 additions & 6 deletions pathogenprofiler/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,30 +95,31 @@ def load_csq(self,ann_file=None):
adr = {alleles[i]:d/sum(ad) for i,d in enumerate(ad)}
if row[i+1][0]=="@": continue
if info[-1]=="pseudogene": continue
gene = info[1]
gene_id = info[2]
gene_name = info[1]
if info[0]=="intron":continue
if info[0]=="coding_sequence":
cng = "%s%s>%s" % (ann_pos,call1,call2)
variants[sample].append({"sample":sample,"gene_id":ann_gene,"chr":chrom,"genome_pos":pos,"type":"non_coding","change":cng,"freq":adr[call2]})
elif "missense" in info[0] or "start_lost" in info[0] or "stop_gained" in info[0]:
variants[sample].append({"sample":sample,"gene_id":gene,"chr":chrom,"genome_pos":pos,"type":info[0],"change":info[5],"freq":adr[call2]})
variants[sample].append({"sample":sample,"gene_id":gene_id,"gene_name":gene_name,"chr":chrom,"genome_pos":pos,"type":info[0],"change":info[5],"freq":adr[call2]})
elif "synonymous" in info[0] or info[0]=="stop_retained":
change_num,ref_nuc,alt_nuc = parse_mutation(info[6])
change = "%s%s>%s" % (ann_pos,ref_nuc,alt_nuc) if ann_pos else "%s%s>%s" % (pos,ref_nuc,alt_nuc)
variants[sample].append({"sample":sample,"gene_id":gene,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
variants[sample].append({"sample":sample,"gene_id":gene_id,"gene_name":gene_name,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
elif "frame" in info[0] or "stop_lost" in info[0]:
if len(info)<6:
if chrom in ann and pos in ann[chrom]:
change = "%s%s>%s" % (pos,ref,call2)
variants[sample].append({"sample":sample,"gene_id":gene,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
variants[sample].append({"sample":sample,"gene_id":gene_id,"gene_name":gene_name,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
else:
variants[sample].append({"sample":sample,"gene_id":gene,"chr":chrom,"genome_pos":pos,"type":info[0],"change":info[6],"freq":adr[call2]})
variants[sample].append({"sample":sample,"gene_id":gene_id,"gene_name":gene_name,"chr":chrom,"genome_pos":pos,"type":info[0],"change":info[6],"freq":adr[call2]})
elif info[0]=="non_coding":
if chrom in ann and pos in ann[chrom]:
gene = ann[chrom][pos][0]
gene_pos = ann[chrom][pos][1]
change = "%s%s>%s" % (gene_pos,ref,call2)
variants[sample].append({"sample":sample,"gene_id":gene,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
variants[sample].append({"sample":sample,"gene_id":gene,"gene_name":gene_name,"chr":chrom,"genome_pos":pos,"type":info[0],"change":change,"freq":adr[call2]})
else:
log(line)
log(info[0]+"\n")
Expand Down

0 comments on commit dad5166

Please sign in to comment.