-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile_whatshap_WG
70 lines (60 loc) · 2.17 KB
/
Snakefile_whatshap_WG
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import glob
import peds
import uuid
### configure
output_dir="output"
wgs_interval= "ref/hg38.wgs_interval.bed"
hg38_ref="ref/Homo_sapiens_assembly38.fasta"
ref_dict=os.path.splitext(hg38_ref)[0]+'.dict'
### Define trios
ped_dir = "ped_files"
ped_files = glob.glob(ped_dir + "/*.ped")
families = {}
for fn in ped_files:
f=peds.open_ped(fn)[0]
families[f.id]=f
fam_ids = [line.strip() for line in open("8trios.lst", "r")]
child_ids = [[person.id for person in families[fid] if families[fid].get_father(person) ][0] for fid in fam_ids]
CHILD_DICT=dict(zip(fam_ids,child_ids))
# print (CHILD_DICT)
# exit(0)
rule all:
input:
expand(output_dir +"/phase_child/{fam}_child.phased.vcf.gz", fam=fam_ids),
expand(output_dir +"/phase_trios/{fam}.phased.vcf.gz", fam=fam_ids)
rule phase_child:
input:
bams=lambda w: expand(output_dir +"/cram/{id}.cram", id=[CHILD_DICT[w.fam]]),
bais=lambda w: expand(output_dir +"/cram/{id}.cram.crai", id=[CHILD_DICT[w.fam]]),
ref=hg38_ref,
vcf=output_dir +"/glnexus/{fam}.dv_combined.vcf.gz"
output:
vcf= output_dir +"/phase_child/{fam}_child.phased.vcf.gz"
benchmark:
output_dir +"/benchmark/phase_child/{fam}.tsv"
resources:
threads=2,
mem_mb = 40*1000,
runtime= "7d"
shell: """
whatshap phase -o {output.vcf} --tag=PS --indels --reference={input.ref} {input.vcf} {input.bams}
"""
rule phase_trios:
input:
bams=lambda w: expand(output_dir +"/cram/{id}.cram", id=[person.id for person in families[w.fam]]),
bais=lambda w: expand(output_dir +"/cram/{id}.cram.crai", id=[person.id for person in families[w.fam]]),
ped=ped_dir + "/{fam}.ped",
ref=hg38_ref,
vcf=output_dir +"/glnexus/{fam}.dv_combined.vcf.gz"
output:
vcf= output_dir +"/phase_trios/{fam}.phased.vcf.gz"
benchmark:
output_dir +"/benchmark/phase_trios/{fam}.tsv"
resources:
threads=2,
mem_mb = 160*1000,
runtime= "7d"
shell: """
whatshap phase -o {output.vcf} --tag=PS --indels --ped {input.ped} --reference={input.ref} {input.vcf} {input.bams}
"""
### add whatshap stats