Skip to content

Commit

Permalink
Add support for “self-identification”
Browse files Browse the repository at this point in the history
This fixes #213.
  • Loading branch information
smondet committed Apr 13, 2016
1 parent 3cfcb3d commit ebb0d9d
Show file tree
Hide file tree
Showing 16 changed files with 178 additions and 130 deletions.
6 changes: 5 additions & 1 deletion src/bfx_tools/bwa.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ let index
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors:1 ~name
~self_ids:["bwa"; "index"]
Program.(
Machine.Tool.(init bwa_tool)
&& shf "bwa index %s"
Expand Down Expand Up @@ -98,6 +99,7 @@ let mem_align_to_sam
:: [])
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["bwa"; "mem"]
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
Expand Down Expand Up @@ -166,6 +168,7 @@ let align_to_sam
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["bwa"; "aln"]
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
Expand Down Expand Up @@ -219,6 +222,7 @@ let align_to_sam
(single_file result ~host:Machine.(as_host run_with))
~name ~edges
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors:1 ~name program)
~make:(Machine.run_big_program run_with ~processors:1 ~name program
~self_ids:["bwa"; "sampe"])
in
sam
1 change: 1 addition & 0 deletions src/bfx_tools/cufflinks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ let run ~reference_build
Samtools.sort_bam_if_necessary ~run_with ~processors ~by:`Coordinate bam in
let make =
Machine.run_big_program run_with ~name ~processors
~self_ids:["cufflinks"]
Program.(
Machine.Tool.init cufflinks_tool
&& shf "mkdir -p %s" output_dir
Expand Down
6 changes: 5 additions & 1 deletion src/bfx_tools/gatk.ml
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ let indel_realigner :
in
let intervals_option = Region.to_gatk_option on_region in
Machine.run_big_program run_with ~name ~processors
~self_ids:["gatk"; "indel-realigner"]
Program.(
Machine.Tool.(init gatk)
&& shf "cd %s" (Filename.quote run_directory)
Expand Down Expand Up @@ -423,6 +424,7 @@ let base_quality_score_recalibrator
in
let make =
Machine.run_big_program run_with ~name ~processors
~self_ids:["gatk"; "bqsr"]
Program.(
Machine.Tool.(init gatk)
&& call_gatk ~analysis:"BaseRecalibrator" ([
Expand Down Expand Up @@ -475,6 +477,7 @@ let haplotype_caller
let name = sprintf "%s" (Filename.basename output_vcf) in
let make =
Machine.run_big_program run_with ~name
~self_ids:["gatk"; "haplotype-caller"]
Program.(
Machine.Tool.(init gatk)
&& shf "mkdir -p %s" run_path
Expand Down Expand Up @@ -552,7 +555,8 @@ let mutect2
let run_caller =
let name = sprintf "%s" (Filename.basename output_vcf) in
let make =
Machine.run_program run_with ~name
Machine.run_big_program run_with ~name
~self_ids:["gatk"; "mutect2"]
Program.(
Machine.Tool.(init gatk)
&& shf "mkdir -p %s" run_path
Expand Down
58 changes: 30 additions & 28 deletions src/bfx_tools/hisat.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ module Configuration = struct
let to_json {name; version}: Yojson.Basic.json =
`Assoc [
"name", `String name;
"version",
match version with
"version",
match version with
|`V_0_1_6_beta -> `String "V_0_1_6_beta"
|`V_2_0_2_beta -> `String "V_2_0_2_beta"
;
Expand All @@ -22,28 +22,28 @@ module Configuration = struct
end

let index
~reference_build
~index_prefix
~configuration
~(run_with : Machine.t) =
~reference_build
~index_prefix
~configuration
~(run_with : Machine.t) =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
|> Reference_genome.fasta in
let result_dir = Filename.dirname index_prefix in
let version = configuration.Configuration.version in
let hisat_tool = Machine.get_tool run_with (`Hisat version) in
let build_binary =
let build_binary =
match version with
| `V_0_1_6_beta -> "hisat-build"
| `V_2_0_2_beta -> "hisat2-build"
in
let name =
let name =
sprintf "%s-%s" build_binary (Filename.basename reference_fasta#product#path) in
let first_index_file =
let first_index_file =
match version with
| `V_0_1_6_beta -> sprintf "%s.1.bt2" index_prefix
| `V_2_0_2_beta -> sprintf "%s.1.ht2" index_prefix
| `V_0_1_6_beta -> sprintf "%s.1.bt2" index_prefix
| `V_2_0_2_beta -> sprintf "%s.1.ht2" index_prefix
in
workflow_node ~name
(single_file ~host:(Machine.(as_host run_with)) first_index_file)
Expand All @@ -54,16 +54,17 @@ let index
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~name
Program.(
Machine.Tool.(init hisat_tool)
&& shf "mkdir %s" result_dir
&& shf "%s %s %s"
build_binary
reference_fasta#product#path
index_prefix
))
~self_ids:["hisat"; "index"]
Program.(
Machine.Tool.(init hisat_tool)
&& shf "mkdir %s" result_dir
&& shf "%s %s %s"
build_binary
reference_fasta#product#path
index_prefix
))

let align
let align
~reference_build
~processors
~configuration
Expand All @@ -77,7 +78,7 @@ let align
|> Reference_genome.fasta in
let reference_dir = (Filename.dirname reference_fasta#product#path) in
let version = configuration.Configuration.version in
let hisat_binary =
let hisat_binary =
match version with
| `V_0_1_6_beta -> "hisat"
| `V_2_0_2_beta -> "hisat2"
Expand All @@ -91,7 +92,7 @@ let align
let result = sprintf "%s.sam" result_prefix in
let r1_path, r2_path_opt = fastq#product#paths in
let name = sprintf "%s-rna-align-%s" hisat_binary (Filename.basename r1_path) in
let hisat_base_command = sprintf
let hisat_base_command = sprintf
"%s \
-p %d \
-x %s \
Expand All @@ -101,10 +102,10 @@ let align
(Filename.quote index_prefix)
(Filename.quote result)
in
let base_hisat_target ~hisat_command =
let base_hisat_target ~hisat_command =
workflow_node ~name
(single_file
~host:(Machine.(as_host run_with))
~host:(Machine.(as_host run_with))
result)
~edges:[
on_failure_activate (Remove.file ~run_with result);
Expand All @@ -115,22 +116,23 @@ let align
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["hisat"; "align"]
Program.(
Machine.Tool.(init hisat_tool)
&& in_work_dir
&& sh hisat_command
&& sh hisat_command
))
in
match r2_path_opt with
| Some read2 ->
| Some read2 ->
let hisat_command =
String.concat ~sep:" " [
hisat_base_command;
"-1"; (Filename.quote r1_path);
"-2"; (Filename.quote read2);
] in
base_hisat_target ~hisat_command
| None ->
| None ->
let hisat_command = String.concat ~sep:" " [
hisat_base_command;
"-U"; (Filename.quote r1_path);
Expand Down
20 changes: 11 additions & 9 deletions src/bfx_tools/kallisto.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ open Common

(** Create a kallisto specific index of the transcriptome (cDNA) *)
let index
~reference_build
~(run_with : Machine.t) =
~reference_build
~(run_with : Machine.t) =
let open KEDSL in
let reference_transcriptome =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.cdna_exn in
|> Reference_genome.cdna_exn in
let kallisto_tool = Machine.get_tool run_with Machine.Tool.Default.kallisto in
let name =
sprintf "kallisto-index-%s" (Filename.basename reference_transcriptome#product#path) in
Expand All @@ -24,12 +24,13 @@ let index
depends_on Machine.Tool.(ensure kallisto_tool);
]
~make:(Machine.run_big_program run_with ~name
Program.(
Machine.Tool.(init kallisto_tool)
&& shf "kallisto index -i %s %s"
result
reference_transcriptome#product#path
))
~self_ids:["kallisto"; "index"]
Program.(
Machine.Tool.(init kallisto_tool)
&& shf "kallisto index -i %s %s"
result
reference_transcriptome#product#path
))

(** Quantify transcript abundance from RNA fastqs, results in abundance.tsv file *)
let run
Expand Down Expand Up @@ -69,6 +70,7 @@ let run
in
let make =
Machine.run_big_program run_with ~name ~processors
~self_ids:["kallisto"; "quant"]
Program.(
Machine.Tool.init kallisto_tool
&& sh kallisto_quant
Expand Down
36 changes: 19 additions & 17 deletions src/bfx_tools/mosaik.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ open Common
module Remove = Workflow_utilities.Remove

let index
~reference_build
~processors
~(run_with : Machine.t) =
~reference_build
~processors
~(run_with : Machine.t) =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
|> Reference_genome.fasta in
let mosaik_tool = Machine.get_tool run_with Machine.Tool.Default.mosaik in
let name =
sprintf "mosaik-build-%s" (Filename.basename reference_fasta#product#path) in
Expand All @@ -27,19 +27,20 @@ let index
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
Program.(
Machine.Tool.(init mosaik_tool)
&& shf "mkdir -p %s" mosaik_tmp_dir
&& shf "export MOSAIK_TMP=%s" mosaik_tmp_dir
(* Command to build basic MOSAIK reference file *)
&& shf "MosaikBuild -fr %s -oa %s"
reference_fasta#product#path
index_result
(* Command to build MOSAIK index file *)
&& shf "MosaikJump -ia %s -hs 15 -out %s"
index_result
jump_file_result
))
~self_ids:["mosaik"; "index"]
Program.(
Machine.Tool.(init mosaik_tool)
&& shf "mkdir -p %s" mosaik_tmp_dir
&& shf "export MOSAIK_TMP=%s" mosaik_tmp_dir
(* Command to build basic MOSAIK reference file *)
&& shf "MosaikBuild -fr %s -oa %s"
reference_fasta#product#path
index_result
(* Command to build MOSAIK index file *)
&& shf "MosaikJump -ia %s -hs 15 -out %s"
index_result
jump_file_result
))



Expand Down Expand Up @@ -110,6 +111,7 @@ let align
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["mosaik"; "align"]
Program.(
Machine.Tool.(init mosaik_tool)
&& in_work_dir
Expand Down
4 changes: 3 additions & 1 deletion src/bfx_tools/mutect.ml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ let run
Option.value_map ~default:"" dbsnp ~f:(fun node ->
sprintf "--dbsnp %s" (Filename.quote node#product#path)) in
let make =
Machine.run_big_program run_with ~name ~processors:2 Program.(
Machine.run_big_program run_with ~name ~processors:2
~self_ids:["mutect"]
Program.(
Machine.Tool.(init mutect)
&& shf "mkdir -p %s" run_path
&& shf "cd %s" run_path
Expand Down
4 changes: 3 additions & 1 deletion src/bfx_tools/optitype.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ let hla_type ~work_dir ~run_with ~r1 ?r2 ~run_name nt =
let r2pt_opt = Option.map ~f:(fun o -> Filename.quote o#product#path) r2 in
let name = sprintf "optitype-%s" run_name in
let make =
Machine.run_big_program run_with ~name KEDSL.Program.(
Machine.run_big_program run_with ~name
~self_ids:["optitype"]
KEDSL.Program.(
Machine.Tool.init tool
&& exec ["mkdir"; "-p"; work_dir]
&& exec ["cd"; work_dir]
Expand Down
16 changes: 12 additions & 4 deletions src/bfx_tools/picard.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ let create_dict ~(run_with:Machine.t) fasta =
shf "java -jar $PICARD_JAR CreateSequenceDictionary R= %s O= %s"
(Filename.quote src) (Filename.quote dest)) in
let name = sprintf "picard-create-dict-%s" Filename.(basename src) in
let make = Machine.run_program run_with program ~name in
let make =
Machine.run_stream_processor run_with program ~name
~self_ids:["picard"; "create-dict"] in
let host = Machine.(as_host run_with) in
workflow_node (single_file dest ~host) ~name ~make
~edges:[
Expand Down Expand Up @@ -59,7 +61,9 @@ let sort_vcf ~(run_with:Machine.t) ?(sequence_dict) input_vcf =
(Filename.quote src) (Filename.quote dest)))
in
let host = Machine.(as_host run_with) in
let make = Machine.run_program run_with program ~name in
let make =
Machine.run_stream_processor
run_with program ~name ~self_ids:["picard"; "sort-vcf"] in
workflow_node (single_file dest ~host) ~name ~make
~edges:([
depends_on input_vcf; depends_on Machine.Tool.(ensure picard);
Expand Down Expand Up @@ -125,7 +129,9 @@ let mark_duplicates
metrics_path) in
let name =
sprintf "picard-markdups-%s" Filename.(basename input_bam#product#path) in
let make = Machine.run_big_program ~name run_with program in
let make =
Machine.run_big_program ~name run_with program
~self_ids:["picard"; "mark-duplicates"] in
let product = transform_bam input_bam#product output_bam_path in
workflow_node product
~name ~make
Expand Down Expand Up @@ -165,7 +171,9 @@ let bam_to_fastq
) in
let name =
sprintf "picard-bam2fastq-%s" Filename.(basename input_bam#product#path) in
let make = Machine.run_program ~name run_with program in
let make =
Machine.run_big_program ~name run_with program
~self_ids:["picard"; "bam-to-fastq"] in
let edges =
(fun list ->
match r2opt with
Expand Down
1 change: 1 addition & 0 deletions src/bfx_tools/somaticsniper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ let run
Samtools.sort_bam_if_necessary ~run_with ~by:`Coordinate tumor in
let make =
Machine.run_big_program run_with
~self_ids:["somaticsniper"]
~name ~processors:1 Program.(
Machine.Tool.init sniper
&& shf "mkdir -p %s" run_path
Expand Down
Loading

0 comments on commit ebb0d9d

Please sign in to comment.