From 1507cadf4e4137cd3dd1f5f5d54fd4d993aa9ce3 Mon Sep 17 00:00:00 2001 From: Jody Phelan Date: Fri, 12 Apr 2019 14:57:10 +0100 Subject: [PATCH] add get_bed_gt from VCF --- meta.yaml | 8 +++++--- pathogenprofiler/utils.py | 7 +++---- pathogenprofiler/vcf.py | 26 +++++++++++++++++++++++++- scripts/splitchr.py | 1 - 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/meta.yaml b/meta.yaml index 7c59851..4fe0a83 100644 --- a/meta.yaml +++ b/meta.yaml @@ -13,7 +13,7 @@ source: build: script: python -m pip install --no-deps --ignore-installed . noarch: python - number: 0 + number: 1 requirements: host: @@ -26,8 +26,8 @@ requirements: - bowtie2 - minimap2 - parallel - - samtools>=1.9 - - bcftools>=1.9 + - samtools + - bcftools - tqdm - delly @@ -36,6 +36,8 @@ test: - pathogenprofiler commands: - splitchr.py -h + - samtools + - bcftools about: home: https://github.com/jodyphelan/{{ name }} diff --git a/pathogenprofiler/utils.py b/pathogenprofiler/utils.py index a1f399b..06957d6 100644 --- a/pathogenprofiler/utils.py +++ b/pathogenprofiler/utils.py @@ -59,12 +59,12 @@ def get_random_file(prefix = None,extension=None): randint = rand_generator.randint(1,999999) if prefix: if extension: - return "%s.%s.%s" % (prefix,randint,extension) + return "%s.%s%s" % (prefix,randint,extension) else: return "%s.%s.txt" % (prefix,randint) else: if extension: - return "%s.tmp.%s" % (randint,extension) + return "%s.tmp%s" % (randint,extension) else: return "%s.tmp.txt" % (randint) @@ -108,7 +108,6 @@ def load_bed(filename,columns,key1,key2=None,intasint=False): return results def split_bed(bed_file,size,reformat=False): - bed_regions = {} for l in open(filecheck(bed_file)): row = l.rstrip().split() chrom,start,end = row[0],int(row[1]),int(row[2]) @@ -292,7 +291,7 @@ def which(program): def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - fpath, fname = os.path.split(program) + fpath = os.path.split(program)[0] if fpath: if is_exe(program): return program diff --git a/pathogenprofiler/vcf.py b/pathogenprofiler/vcf.py index 03e5335..483e6d4 100644 --- a/pathogenprofiler/vcf.py +++ b/pathogenprofiler/vcf.py @@ -165,7 +165,31 @@ def get_positions(self): row = l.split() results.append((row[0],int(row[1]))) return results - + def get_bed_gt(self,bed_file,ref_file): + add_arguments_to_self(self,locals()) + cmd = "bcftools convert --gvcf2vcf -f %(ref_file)s %(filename)s | bcftools view -T %(bed_file)s | bcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%GT\\t%%AD]\\n'" % vars(self) + results = defaultdict(lambda : defaultdict(dict)) + for l in cmd_out(cmd): + #Chromosome 4348079 0/0 51 + chrom,pos,ref,alt,gt,ad = l.rstrip().split() + pos =int(pos) + d = {} + alts = alt.split(",") + ad = [int(x) for x in ad.split(",")] if ad!="." else [100] + if gt=="0/0": + d[ref] = ad[0] + elif gt=="./.": + d[ref] = 0 + else: + for i,a in enumerate([ref]+alts): + d[a] = ad[i] + results[chrom][pos] = d + bed = load_bed(bed_file,[1,3,5],1,3) + for chrom in bed: + for pos in bed[chrom]: + if int(pos) not in results[chrom]: + results[chrom][int(pos)] = {bed[chrom][pos][2]:50} + return results class delly_bcf(bcf): def __init__(self,filename): diff --git a/scripts/splitchr.py b/scripts/splitchr.py index 3346f48..63428b9 100755 --- a/scripts/splitchr.py +++ b/scripts/splitchr.py @@ -1,5 +1,4 @@ #! /usr/bin/env python -import sys import pathogenprofiler as pp import argparse