Skip to content

Commit

Permalink
add get_bed_gt from VCF
Browse files Browse the repository at this point in the history
  • Loading branch information
Jody Phelan committed Apr 12, 2019
1 parent 071dee0 commit 1507cad
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
8 changes: 5 additions & 3 deletions meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ source:
build:
script: python -m pip install --no-deps --ignore-installed .
noarch: python
number: 0
number: 1

requirements:
host:
Expand All @@ -26,8 +26,8 @@ requirements:
- bowtie2
- minimap2
- parallel
- samtools>=1.9
- bcftools>=1.9
- samtools
- bcftools
- tqdm
- delly

Expand All @@ -36,6 +36,8 @@ test:
- pathogenprofiler
commands:
- splitchr.py -h
- samtools
- bcftools

about:
home: https://github.com/jodyphelan/{{ name }}
Expand Down
7 changes: 3 additions & 4 deletions pathogenprofiler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ def get_random_file(prefix = None,extension=None):
randint = rand_generator.randint(1,999999)
if prefix:
if extension:
return "%s.%s.%s" % (prefix,randint,extension)
return "%s.%s%s" % (prefix,randint,extension)
else:
return "%s.%s.txt" % (prefix,randint)
else:
if extension:
return "%s.tmp.%s" % (randint,extension)
return "%s.tmp%s" % (randint,extension)
else:
return "%s.tmp.txt" % (randint)

Expand Down Expand Up @@ -108,7 +108,6 @@ def load_bed(filename,columns,key1,key2=None,intasint=False):
return results

def split_bed(bed_file,size,reformat=False):
bed_regions = {}
for l in open(filecheck(bed_file)):
row = l.rstrip().split()
chrom,start,end = row[0],int(row[1]),int(row[2])
Expand Down Expand Up @@ -292,7 +291,7 @@ def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

fpath, fname = os.path.split(program)
fpath = os.path.split(program)[0]
if fpath:
if is_exe(program):
return program
Expand Down
26 changes: 25 additions & 1 deletion pathogenprofiler/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,31 @@ def get_positions(self):
row = l.split()
results.append((row[0],int(row[1])))
return results

def get_bed_gt(self,bed_file,ref_file):
add_arguments_to_self(self,locals())
cmd = "bcftools convert --gvcf2vcf -f %(ref_file)s %(filename)s | bcftools view -T %(bed_file)s | bcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%GT\\t%%AD]\\n'" % vars(self)
results = defaultdict(lambda : defaultdict(dict))
for l in cmd_out(cmd):
#Chromosome 4348079 0/0 51
chrom,pos,ref,alt,gt,ad = l.rstrip().split()
pos =int(pos)
d = {}
alts = alt.split(",")
ad = [int(x) for x in ad.split(",")] if ad!="." else [100]
if gt=="0/0":
d[ref] = ad[0]
elif gt=="./.":
d[ref] = 0
else:
for i,a in enumerate([ref]+alts):
d[a] = ad[i]
results[chrom][pos] = d
bed = load_bed(bed_file,[1,3,5],1,3)
for chrom in bed:
for pos in bed[chrom]:
if int(pos) not in results[chrom]:
results[chrom][int(pos)] = {bed[chrom][pos][2]:50}
return results

class delly_bcf(bcf):
def __init__(self,filename):
Expand Down
1 change: 0 additions & 1 deletion scripts/splitchr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#! /usr/bin/env python
import sys
import pathogenprofiler as pp
import argparse

Expand Down

0 comments on commit 1507cad

Please sign in to comment.