Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jpuerto/wdl tests #27

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 1.2
workflows:
- subclass: WDL
primaryDescriptorPath: ./pipeline.wdl
name: salmon-rnaseq-wdl
- subclass: CWL
primaryDescriptorPath: ./pipeline.cwl
name: salmon-rnaseq-cwl
98 changes: 98 additions & 0 deletions pipeline.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
## Use double '#' for workflow-level comments
## This workflow implements a one-task workflow

# write the WDL version number 'version 1.0' -- 1
# possible to write 'WDL developent' as a version number as well
version development

# create a workflow named 'HelloWorld' -- 2
import "./steps/salmon-quantification.wdl" as SalmonQuantification
import "./steps/fastqc.wdl" as FastQC
import "./steps/scanpy-analysis.wdl" as ScanPyAnalysis
import "./steps/scvelo-analysis.wdl" as ScVeloAnalysis
import "./steps/squidpy-analysis.wdl" as SquidPyAnalysis
import "./steps/compute-qc-metrics.wdl" as ComputeQCMetrics

workflow SalmonRNAseq {
input {
Array[Directory] fastq_dir
Directory? img_dir
Directory? metadata_dir
String assay
Int threads
Int? expected_cell_count
Boolean? keep_all_barcodes
}

call SalmonQuantification.SalmonQuantification as SalmonQuantificationCall {
input:
fastq_dir = fastq_dir,
img_dir = img_dir,
metadata_dir = metadata_dir,
assay = assay,
threads = threads,
expected_cell_count = expected_cell_count,
keep_all_barcodes = keep_all_barcodes
}

scatter (fastq in fastq_dir) {
call FastQC.FastQC as FastQCCall {
input:
fastq_dir = fastq,
threads = threads
}
}

call ScanPyAnalysis.ScanPyAnalysis as ScanPyAnalysisCall {
input:
assay = assay,
h5ad_file = SalmonQuantificationCall.count_matrix_h5ad
}

call ScVeloAnalysis.ScVeloAnalysis as ScVeloAnalysisCall {
input:
spliced_h5ad_file = SalmonQuantificationCall.count_matrix_h5ad,
assay_name = assay
}

call SquidPyAnalysis.SquidPyAnalysis as SquidPyAnalysisCall {
input:
assay = assay,
h5ad_file = ScanPyAnalysisCall.filtered_data_h5ad,
img_dir = img_dir
}

call ComputeQCMetrics.ComputeQCMetrics as ComputeQCMetricsCall {
input:
assay = assay,
h5ad_primary = SalmonQuantificationCall.count_matrix_h5ad,
h5ad_secondary = ScanPyAnalysisCall.filtered_data_h5ad,
salmon_dir = SalmonQuantificationCall.salmon_output
}

output {
Directory salmon_output = SalmonQuantificationCall.salmon_output
File count_matrix_h5ad = SalmonQuantificationCall.count_matrix_h5ad
File? raw_count_matrix = SalmonQuantificationCall.raw_count_matrix
File genome_build_json = SalmonQuantificationCall.genome_build_json
Array[Directory] fastqc_dir = FastQCCall.fastqc_dir
File scanpy_qc_results = ComputeQCMetricsCall.scanpy_qc_results
File qc_report = ComputeQCMetricsCall.qc_metrics
File dispersion_plot = ScanPyAnalysisCall.dispersion_plot
File umap_plot = ScanPyAnalysisCall.umap_plot
File umap_density_plot = ScanPyAnalysisCall.umap_density_plot
File? spatial_plot = ScanPyAnalysisCall.spatial_plot
File filtered_data_h5ad = ScanPyAnalysisCall.filtered_data_h5ad
File marker_gene_plot_t_test = ScanPyAnalysisCall.marker_gene_plot_t_test
File marker_gene_plot_logreg = ScanPyAnalysisCall.marker_gene_plot_logreg
File? scvelo_annotated_h5ad = ScVeloAnalysisCall.annotated_h5ad_file
File? scvelo_embedding_grid_plot = ScVeloAnalysisCall.embedding_grid_plot
File? squidpy_annotated_h5ad = SquidPyAnalysisCall.squidpy_annotated_h5ad
File? neighborhood_enrichment_plot = SquidPyAnalysisCall.neighborhood_enrichment_plot
File? co_occurrence_plot = SquidPyAnalysisCall.co_occurrence_plot
File? interaction_matrix_plot = SquidPyAnalysisCall.interaction_matrix_plot
File? centrality_scores_plot = SquidPyAnalysisCall.centrality_scores_plot
File? ripley_plot = SquidPyAnalysisCall.ripley_plot
File? squidpy_spatial_plot = SquidPyAnalysisCall.spatial_plot
}
}
23 changes: 23 additions & 0 deletions steps/compute-qc-metrics.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
version development

task ComputeQCMetrics {
input {
String assay
File h5ad_primary
File h5ad_secondary
Directory salmon_dir
}

output {
File scanpy_qc_results = "qc_results.hdf5"
File qc_metrics = "qc_results.json"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/compute_qc_metrics.py ~{assay} ~{h5ad_primary} ~{h5ad_secondary} ~{salmon_dir}
}
}
20 changes: 20 additions & 0 deletions steps/fastqc.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
version development

task FastQC {
input {
Directory fastq_dir
Int threads
}

output {
Directory fastqc_dir = "fastqc_output"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/fastqc_wrapper.py ~{fastq_dir} ~{threads}
}
}
168 changes: 168 additions & 0 deletions steps/salmon-quantification.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
version development

workflow SalmonQuantification {
input {
Array[Directory] fastq_dir
Directory? img_dir
Directory? metadata_dir
String assay
Int threads
Int? expected_cell_count
Boolean? keep_all_barcodes
}

output {
Directory salmon_output = Salmon.output_dir
File count_matrix_h5ad = AnnotateCells.annotated_h5ad_file
File? raw_count_matrix = AlevinToAnndata.raw_expr_h5ad
File genome_build_json = AlevinToAnndata.genome_build_json
}

call AdjustBarcodes{
input:
assay = assay,
fastq_dir = fastq_dir
}

call TrimReads {
input:
assay = assay,
adj_fastq_dir = AdjustBarcodes.adj_fastq_dir,
orig_fastq_dirs = fastq_dir,
threads = threads
}

call Salmon {
input:
orig_fastq_dirs = fastq_dir,
trimmed_fastq_dir = TrimReads.trimmed_fastq_dir,
assay = assay,
threads = threads,
expected_cell_count = expected_cell_count,
keep_all_barcodes = keep_all_barcodes
}

call AlevinToAnndata {
input:
assay = assay,
alevin_dir = Salmon.output_dir
}

call AnnotateCells {
input:
assay = assay,
orig_fastq_dirs = fastq_dir,
h5ad_file = AlevinToAnndata.expr_h5ad,
img_dir = img_dir,
metadata_dir = metadata_dir,
metadata_json = AdjustBarcodes.metadata_json
}
}

task AdjustBarcodes {
input {
String assay
Array[Directory] fastq_dir
}

output {
Directory adj_fastq_dir = "adj_fastq"
File? metadata_json = "metadata.json"
}

command {
/opt/adjust_barcodes.py ~{assay} directory ~{sep(" ", fastq_dir)}
}

runtime {
container: "hubmap/scrna-barcode-adj:latest"
}
}

task TrimReads {
input {
String assay
Directory adj_fastq_dir
Array[Directory] orig_fastq_dirs
Int threads
}

output {
Directory trimmed_fastq_dir = "trimmed"
}

runtime {
container: "hubmap/scrna-trim-reads:latest"
}

command {
/opt/trim_reads.py ~{assay} ~{adj_fastq_dir} ~{sep(" ", orig_fastq_dirs)}
}
}

task Salmon {
input {
String assay
Directory trimmed_fastq_dir
Array[Directory] orig_fastq_dirs
Int threads
Int? expected_cell_count
Boolean? keep_all_barcodes
}

output {
Directory output_dir = "salmon_out"
}

runtime {
container: "hubmap/salmon-grch38:latest"
}

command {
/opt/salmon_wrapper.py ~{assay} ~{trimmed_fastq_dir} ~{sep(" ", orig_fastq_dirs)} --threads ~{threads} ~{if defined(expected_cell_count) then "--expected-cell-count " + expected_cell_count else ""} ~{if defined(keep_all_barcodes) then "--keep-all-barcodes " + keep_all_barcodes else ""}
}
}

task AlevinToAnndata {
input {
String assay
Directory alevin_dir
}

output {
File? raw_expr_h5ad = "raw_expr.h5ad"
File expr_h5ad = "expr.h5ad"
File genome_build_json = "genome_build.json"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/alevin_to_anndata.py ~{assay} ~{alevin_dir}
}
}

task AnnotateCells {
input {
String assay
File h5ad_file
Array[Directory] orig_fastq_dirs
Directory? img_dir
Directory? metadata_dir
File? metadata_json
}

output {
File annotated_h5ad_file = "expr.h5ad"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/annotate_cells.py ~{assay} ~{h5ad_file} ~{sep(" ", orig_fastq_dirs)} ~{if defined(img_dir) then "--img_dir " + img_dir else ""} ~{if defined(metadata_dir) then "--metadata_dir " + metadata_dir else ""} ~{if defined(metadata_json) then "--metadata_json " + metadata_json else ""}
}
}
26 changes: 26 additions & 0 deletions steps/scanpy-analysis.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version development

task ScanPyAnalysis {
input {
String assay
File h5ad_file
}

output {
File filtered_data_h5ad = "secondary_analysis.h5ad"
File dispersion_plot = "dispersion_plot.pdf"
File umap_plot = "umap_by_leiden_cluster.pdf"
File? spatial_plot = "spatial_pos_by_leiden_cluster.pdf"
File umap_density_plot = "umap_embedding_density.pdf"
File marker_gene_plot_t_test = "marker_genes_by_cluster_t_test.pdf"
File marker_gene_plot_logreg = "marker_genes_by_cluster_logreg.pdf"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/scanpy_entry_point.py ~{assay} ~{h5ad_file}
}
}
21 changes: 21 additions & 0 deletions steps/scvelo-analysis.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version development

task ScVeloAnalysis {
input {
File spliced_h5ad_file
String assay_name
}

output {
File? annotated_h5ad_file = "scvelo_annotated.h5ad"
File? embedding_grid_plot = "scvelo_embedding_grid.pdf"
}

runtime {
container: "hubmap/scrna-analysis:latest"
}

command {
/opt/scvelo_analysis.py ~{spliced_h5ad_file} ~{assay_name}
}
}
Loading