Skip to content

Commit

Permalink
bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
jodyphelan committed Oct 17, 2024
1 parent e255320 commit 666563b
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions scripts/combine_vcf_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def get_alleles(
continue
if read.query_qualities[read_pos] < 12:
continue

if read_nt.islower():
read_nt = read.query_sequence[read_pos].upper()

Expand Down Expand Up @@ -107,6 +108,14 @@ def get_codon_pos(chrom: str,pos: int,exons: List[Exon]):
exon.id = f"{gene.gene_id}_exon{i+1}"
exons.append(exon)

def has_md_tag(bam):
for read in bam.fetch():
break
if read.has_tag('MD'):
return True
else:
return False

ref = pysam.FastaFile(args.ref)
coding_variants = defaultdict(list)
other_variants = []
Expand All @@ -117,6 +126,7 @@ def get_codon_pos(chrom: str,pos: int,exons: List[Exon]):
vcf.header.add_line('##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">')

for var in vcf:
logging.debug(var)
gene,cpos = get_codon_pos(var.chrom,var.pos,exons)
if var.rlen!=1 or len(var.alts[0])!=1:
other_variants.append(var)
Expand All @@ -125,15 +135,12 @@ def get_codon_pos(chrom: str,pos: int,exons: List[Exon]):
else:
coding_variants[(gene,cpos)].append(var)

def has_md_tag(bam):
for read in bam.fetch():
break
if read.has_tag('MD'):
return True
else:
return False

logging.debug(coding_variants[('PVP01_1429500_exon2', 164)][0])

for key,variants in coding_variants.items():
logging.debug((key,[str(v) for v in variants]))
logging.debug(coding_variants[('PVP01_1429500_exon2', 164)][0])
if len(variants)==1:
other_variants.append(variants[0])
continue
Expand All @@ -145,11 +152,11 @@ def has_md_tag(bam):
ref_hap = ''.join([ref.fetch(p.chrom,p.pos-1,p.pos) for p in positions])
if args.bam and has_md_tag(pysam.AlignmentFile(args.bam)):
haplotypes_by_strand = get_haplotype_counts(pysam.AlignmentFile(args.bam),positions,bystrand=True)

logging.debug(haplotypes_by_strand)
else:
# alt is just a combination of all the alt alleles
alt_hap = ''.join([v.alts[0] for v in variants])

logging.debug('adkasjdaosdjaoisd')
ds = defaultdict(list)
for v in variants:
ds[v.pos].append(v.alts[0])
Expand Down Expand Up @@ -204,8 +211,8 @@ def has_md_tag(bam):
if 'ADF' in var.samples[0]:
print(var)
print(hap_fwd,hap_rev)
var.samples[0]['ADF'] = (ref_fwd,hap_fwd)
var.samples[0]['ADR'] = (ref_rev,hap_rev)
variant.samples[0]['ADF'] = (ref_fwd,hap_fwd)
variant.samples[0]['ADR'] = (ref_rev,hap_rev)
if 'AD' in variant.samples[0]:
variant.samples[0]['AD'] = [dp - count, hap_fwd + hap_rev]

Expand Down

0 comments on commit 666563b

Please sign in to comment.