-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocess-illumina.cwl
172 lines (152 loc) · 3.64 KB
/
preprocess-illumina.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
ScatterFeatureRequirement: {}
SubworkflowFeatureRequirement: {}
MultipleInputFeatureRequirement: {}
StepInputExpressionRequirement: {}
InlineJavascriptRequirement: {}
inputs:
illumina_accession: string
hg38: File
steps:
unpack_ref_genome:
in:
file: hg38
out:
- hg_38_fa
run: ./tools/unpack_ref_db.cwl
fetch_fastqs:
in:
sra_accession: illumina_accession
out:
- fastq_file_1
- fastq_file_2
run: ../bio-cwl-tools/sratoolkit/prefetch_fastq.cwl
fastp:
in:
fastq1: fetch_fastqs/fastq_file_1
fastq2: fetch_fastqs/fastq_file_2
out:
- out_fastq1
- out_fastq2
- html_report
- json_report
run: ../bio-cwl-tools/fastp/fastp.cwl
multiqc:
in:
qc_files_array: fastp/json_report
out:
- multiqc_zip
- multiqc_html
run: ../bio-cwl-tools/multiqc/multiqc.cwl
bwa_mem:
in:
Index: unpack_ref_genome/hg_38_fa
InputFile:
source: [fastp/out_fastq1, fastp/out_fastq2]
valueFrom: |
${
return self.filter(function(x){return x});
}
linkMerge: merge_flattened
Threads:
valueFrom: $(1)
out:
- reads_stdout
run: ../bio-cwl-tools/bwa/BWA-Mem.cwl
samtools_view:
in:
sam: bwa_mem/reads_stdout
out:
- bam
run: ../bio-cwl-tools/samtools/samtools_view_sam2bam.cwl
samtools_fastq:
in:
bam_sorted: samtools_view/bam
out:
- fastq
run: ../bio-cwl-tools/samtools/samtools_fastq.cwl
rename_multiqc_html:
in:
srcfile: multiqc/multiqc_html
newname:
source: illumina_accession
valueFrom: $(self)_multiqc.html
out:
- outfile
run: ../bio-cwl-tools/util/rename.cwl
rename_multiqc_zip:
in:
srcfile: multiqc/multiqc_zip
newname:
source: illumina_accession
valueFrom: $(self)_multiqc.zip
out:
- outfile
run: ../bio-cwl-tools/util/rename.cwl
rename_fastp_html:
in:
srcfile: fastp/html_report
newname:
source: illumina_accession
valueFrom: $(self)_fastp.html
out:
- outfile
run: ../bio-cwl-tools/util/rename.cwl
rename_fastp_json:
in:
srcfile: fastp/json_report
newname:
source: illumina_accession
valueFrom: $(self)_fastp.json
out:
- outfile
run: ../bio-cwl-tools/util/rename.cwl
rename_bam:
in:
srcfile: samtools_view/bam
newname:
source: illumina_accession
valueFrom: $(self).bam
out:
- outfile
run: ../bio-cwl-tools/util/rename.cwl
outputs:
original_fastq1:
type: File
format: edam:format_1930 # FASTQ
outputSource: fetch_fastqs/fastq_file_1
original_fastq2:
type: File?
format: edam:format_1930 # FASTQ
outputSource: fetch_fastqs/fastq_file_2
bam:
type: File
format: edam:format_2572 # BAM
outputSource: rename_bam/outfile
fastp_html_report:
type: File
format: edam:format_2331 # HTML
outputSource: rename_fastp_html/outfile
fastp_json_report:
type: File
format: edam:format_3464 # JSON
outputSource: rename_fastp_json/outfile
multiqc_html:
type: File
format: edam:format_2331 # HTML
outputSource: rename_multiqc_html/outfile
multiqc_zip:
type: File
format: edam:format_3464 # JSON
outputSource: rename_multiqc_zip/outfile
mapped_fastq:
type: File
format: edam:format_1930 # FASTQ
outputSource: samtools_fastq/fastq
$namespaces:
edam: http://edamontology.org/
$schemas:
- http://edamontology.org/EDAM_1.20.owl