From bf5c9e88b9abd1d1ff12340cffdb53afc9d89a0b Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 12 Jul 2024 19:16:38 +0100 Subject: [PATCH 1/7] add bwa-mem2 to Dockerfile --- Dockerfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Dockerfile b/Dockerfile index c8d2dc2c..8fa82ed7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,7 @@ ARG BASE_IMAGE=ubuntu:22.04 ARG BAMBI_VERSION="0.17.1" ARG BIOBAMBAM2_VERSION="2.0.185-release-20221211202123" ARG BWA_VERSION="0.7.18" +ARG BWA_MEM2_VERSION="2.2.1" ARG DEFLATE_VERSION="1.20" ARG HTSLIB_VERSION="1.20" ARG IO_LIB_VERSION="1.15.0" @@ -127,6 +128,17 @@ RUN curl -sSL -O "https://github.com/lh3/bwa/archive/refs/tags/v${BWA_VERSION}.t chmod +x /usr/local/bin/bwa && \ ln -s /usr/local/bin/bwa /usr/local/bin/bwa0_6 +ARG BWA_MEM2_VERSION +RUN curl -sSL -O "https://github.com/bwa-mem2/bwa-mem2/releases/download/v${BWA_MEM2_VERSION}/bwa-mem2-${BWA_MEM2_VERSION}_x64-linux.tar.bz2" && \ + tar xfj ./bwa-mem2-${BWA_MEM2_VERSION}_x64-linux.tar.bz2 && \ + cd ./bwa-mem2-${BWA_MEM2_VERSION}_x64-linux && \ + cp ./bwa-mem2 /usr/local/bin/ && \ + cp ./bwa-mem2.avx /usr/local/bin/ && \ + cp ./bwa-mem2.avx2 /usr/local/bin/ && \ + cp ./bwa-mem2.avx512bw /usr/local/bin/ && \ + cp ./bwa-mem2.sse41 /usr/local/bin/ && \ + cp ./bwa-mem2.sse42 /usr/local/bin/ + ARG BAMBI_VERSION RUN git clone --single-branch --branch="$BAMBI_VERSION" --depth=1 "https://github.com/wtsi-npg/bambi.git" && \ cd bambi && \ From 99d8c3772e2e853cafd9692eeb64f38eba13def0 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 19 Jul 2024 11:48:22 +0100 Subject: [PATCH 2/7] changes to templates and ctfp to do alternate stage2 processing --- bin/vtfp.pl | 34 +++++++-- .../vtlib/alignment_wtsi_stage2_template.json | 31 ++++---- data/vtlib/crammerge.json | 37 ++++++++++ data/vtlib/elembio_nanoseq_s2_pi.json | 71 +++++++++++++++++++ data/vtlib/fastq_s2_pi.json | 64 +++++++++++++++++ data/vtlib/subsample.json | 25 +++---- data/vtlib/subsample_spec_frac.json | 31 ++++++++ data/vtlib/subsample_tmf.json | 37 ++++++++++ 8 files changed, 291 insertions(+), 39 deletions(-) create mode 100644 data/vtlib/crammerge.json create mode 100644 data/vtlib/elembio_nanoseq_s2_pi.json create mode 100644 data/vtlib/fastq_s2_pi.json create mode 100644 data/vtlib/subsample_spec_frac.json create mode 100644 data/vtlib/subsample_tmf.json diff --git a/bin/vtfp.pl b/bin/vtfp.pl index d7c7ea44..914e8d76 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -451,6 +451,12 @@ sub apply_subst { $ewi->{removelabel}->(); } + if($cfg->{subgraph_io}) { + $ewi->{addlabel}->(q{subgraph_io}); + $cfg->{subgraph_io} = subst_walk($cfg->{subgraph_io}, $params, $ewi); + $ewi->{removelabel}->(); + } + return; } @@ -1412,9 +1418,9 @@ sub validate_splice_candidates { } } - # all edge termini must be unique (over replacement and pruning edges) except for STDIN/STDOUT + # all edge termini must be unique (over replacement edges) except for STDIN/STDOUT my %endpoints; - for my $edge (@{$splice_candidates->{replacement_edges}}, @{$prune_edges}) { + for my $edge (@{$splice_candidates->{replacement_edges}}) { my $from_end = $edge->{from}; if($from_end and $from_end !~ /:/) { $from_end .= q[:STDOUT] }; @@ -1472,15 +1478,33 @@ sub final_splice { # add new edges push @{$flat_graph->{edges}}, @{$splice_candidates->{replacement_edges}}; - # remove pruned ports - prune edges are not required to be two-ended; just disregard undefined to/from attributes + # remove pruned ports - prune edges are not required to be two-ended; just disregard undefined to/from attributes; only remove ports + # that do not appear in splice edges (aka replacement edges) for my $prune_edge (@{$splice_candidates->{prune_edges}}) { - if($prune_edge->{from}) { remove_port($prune_edge->{from}, $SRC, $flat_graph); } - if($prune_edge->{to}) { remove_port($prune_edge->{to}, $DST, $flat_graph); } + if($prune_edge->{from} and not _in_replacement_edges($prune_edge->{from}, $splice_candidates, $SRC)) { remove_port($prune_edge->{from}, $SRC, $flat_graph); } + if($prune_edge->{to} and not _in_replacement_edges($prune_edge->{to}, $splice_candidates, $DST)) { remove_port($prune_edge->{to}, $DST, $flat_graph); } } return $flat_graph; } +sub _in_replacement_edges { + my ($port_spec, $splice_candidates, $type) = @_; + + my $direction = ($type == $SRC)? q[from]: q[to]; + my $std_port = ($type == $SRC)? q[STDIN]: q[STDOUT]; + + for my $edge (@{$splice_candidates->{replacement_edges}}) { + my $end = $edge->{$direction}; + if($end and $end !~ /:/) { $end .= qq[:$std_port] }; + + if($end eq $port_spec) { return 1; } + } + + return 0; + +} + ################################################################################################ # resolve_ports: # given a splice_pair specification, fully determine the [set of] source and destination ports diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 306b4f64..1ec06f98 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -95,24 +95,19 @@ ], "nodes":[ { - "id":"crammerge", - "type":"EXEC", - "use_STDIN": false, - "use_STDOUT": true, - "cmd": [ - "samtools", - "merge", - "-n", - "-O", "BAM", - "-l", "0", - {"select":"s2_input_format", "default":"cram", "select_range":[1], "cases":{ - "cram":["--input-fmt-option", "no_ref=1"], - "bam":["--input-fmt", "bam"] + "id":"preprocess_inputs", + "type":"VTFILE", + "name":{"subst":"s2_preprocess_inputs_method", "required":true, + "ifnull":{ + "select":"s2_ppi_switch", "default":"crammerge","select_range":[1], + "cases":{ + "crammerge":"crammerge.json", + "aviti":"elembio_nanoseq_s2_pi.json"} }}, - "-", - {"subst":"incrams"} - ], - "description":"merge individual cram files from a sample into one bam file" + "subst_map":{"input_format":{"subst":"s2_input_format"}}, + "comment":"inputs: NONE; outputs: _stdout_ (bam), subst_map_parameters:[input_format]", + "node_prefix":"ppi_", + "description":"subgraph to preprocess inputs. Default: merge individual cram files from a sample into one bam file" }, { "id":"spatial_filter", @@ -264,7 +259,7 @@ } ], "edges":[ - { "id":"src_to_bc2", "from":"crammerge", "to":{"subst":"post_cm","required":true} }, + { "id":"src_to_bc2", "from":"preprocess_inputs", "to":{"subst":"post_cm","required":true} }, {"select":"spatial_filter_switch", "required":true, "select_range":[1], "default":"on", "allow_unspec_keys":true, "cases":{ "on": [ diff --git a/data/vtlib/crammerge.json b/data/vtlib/crammerge.json new file mode 100644 index 00000000..de833348 --- /dev/null +++ b/data/vtlib/crammerge.json @@ -0,0 +1,37 @@ +{ +"version":"2.0", +"description":"run bwa mem to to align input bam to supplied reference genome", +"subgraph_io":{ + "ports":{ + "inputs":{ + }, + "outputs":{ + "_stdout_":"crammerge" + } + } +}, +"subst_params":[], +"nodes":[ + { + "id":"crammerge", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd": [ + "samtools", + "merge", + "-n", + "-O", "BAM", + "-l", "0", + {"select":"input_format", "default":"cram", "select_range":[1], "cases":{ + "cram":["--input-fmt-option", "no_ref=1"], + "bam":["--input-fmt", "bam"] + }}, + "-", + {"subst":"incrams", "required":true} + ], + "description":"merge individual cram files from a sample into one bam file" + } +], +"edges":[] +} diff --git a/data/vtlib/elembio_nanoseq_s2_pi.json b/data/vtlib/elembio_nanoseq_s2_pi.json new file mode 100644 index 00000000..a96c2b4a --- /dev/null +++ b/data/vtlib/elembio_nanoseq_s2_pi.json @@ -0,0 +1,71 @@ +{ +"version":"2.0", +"description":"alternate pre-processing method for stage2 inputs (Elembio NanoSeq)", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ "_stdout_":"ubam_convert" } + } +}, +"nodes":[ + { + "id":"import", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "import", + "-R", {"subst":"elembio_nanoseq_RG","required":true, "comment":"readgroup"}, + "-1", {"subst":"elembio_nanoseq_fq1","required":true, "comment":"FASTQ read 1"}, + "-2", {"subst":"elembio_nanoseq_fq2","required":true, "comment":"FASTQ read 2"}, + "-i", + "-u", + "-O", "bam" + ] + }, + { + "id":"read2tags", + "type":{ + "select":"pp_read2tags", + "required":true, + "select_range":[1], + "default":"off", + "cases":{ + "on":"EXEC", + "off":"INACTIVE" + } + }, + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"bambi_executable", "required":true, "ifnull":"bambi"}, "read2tags", + "--tags", "rb,mb,br,rb,mb,br", + "--qtags", "rq,mq,bq,rq,mq,bq", + "--positions", "1:1:1:3,1:2:1:3,1:1:4:7,2:2:1:3,2:1:1:3,2:2:4:7", + "--output-fmt", "sam" + ] + }, + { + "id":"add_nanoseq_mate_tags", + "type":"INACTIVE", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ "add_NanoSeq_mate_tags.pl" ] + }, + { + "id":"ubam_convert", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "view", + "-u", + "-" + ] + } +], +"edges":[ + { "id":"import_to_read2tags", "from":"import", "to":"read2tags" }, + { "id":"read2tags_to_add_nanoseq_mate_tags", "from":"read2tags", "to":"ubam_convert" } +] +} diff --git a/data/vtlib/fastq_s2_pi.json b/data/vtlib/fastq_s2_pi.json new file mode 100644 index 00000000..a7195742 --- /dev/null +++ b/data/vtlib/fastq_s2_pi.json @@ -0,0 +1,64 @@ +{ +"version":"2.0", +"description":"alternate pre-processing method for stage2 inputs accepting FASTQ input (for e.g. Elembio NanoSeq)", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ "_stdout_": + {"select":"pp_read2tags", "required":true, "default":"off", + "cases":{ + "off": "import", + "on": "read2tags" + } + } + } + } +}, +"nodes":[ + { + "id":"import", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "import", + "-R", {"subst":"fastq_s2_pi_RG_ID","required":true, "comment":"readgroup"}, + "-1", {"subst":"fastq_s2_pi_fq1","required":true, "comment":"FASTQ read 1"}, + "-2", {"subst":"fastq_s2_pi_fq2","required":true, "comment":"FASTQ read 2"}, + "-i", + "-u", + "-O", "bam" + ] + }, + { + "id":"read2tags", + "type":{ + "select":"pp_read2tags", + "required":true, + "select_range":[1], + "default":"off", + "cases":{ + "on":"EXEC", + "off":"INACTIVE" + } + }, + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"bambi_executable", "required":true, "ifnull":"bambi"}, "read2tags", + "--tags", "rb,mb,br,rb,mb,br", + "--qtags", "rq,mq,bq,rq,mq,bq", + "--positions", "1:1:1:3,1:2:1:3,1:1:4:7,2:2:1:3,2:1:1:3,2:2:4:7", + "--compression-level", 0, + "--output-fmt", "bam" + ] + } +], +"edges":[ + {"select":"pp_read2tags", "required":true, "default":"off", "cases":{ + "off": [], + "on": [ { "id":"import_to_read2tags", "from":"import", "to":"read2tags" }] + } + } +] +} diff --git a/data/vtlib/subsample.json b/data/vtlib/subsample.json index 271cf6ca..b6f1ed92 100644 --- a/data/vtlib/subsample.json +++ b/data/vtlib/subsample.json @@ -36,22 +36,15 @@ }, { "id":"subsample", - "type":"EXEC", - "subtype":"STRINGIFY", - "use_STDIN": true, - "use_STDOUT": true, - "cmd":[ - "bash -c '", - {"subst_constructor":{"vals":["tmfs=\"", {"subst":"tag_metrics_files", "required":true}, "\""],"postproc":{"op":"concat","pad":""}}}, "; if [ ! -z \"${tmfs}\" ]; then for tag_metrics_file in ${tmfs}; do reads_count=`jq", {"subst":"jqkey", "ifnull":{"subst_constructor":{"vals":["'\"'\"'.reads_count.\"", {"subst":"s2_tag_index", "required":true}, "\"'\"'\"'"],"postproc":{"op":"concat","pad":""}}}}, "${tag_metrics_file}`; reads_count=`echo ${reads_count} | tr -cd [:digit:]`; reads_count_total=$((${reads_count_total}+${reads_count})); done; if [[ $reads_count_total -eq 0 ]]; then reads_count_total=1; fi; frac=`echo \"10000/${reads_count_total}\" | bc -l`; fi;", - "if [ ! -z $frac ]; then", - "samtools", - "view", - "-s", {"subst":"seed_frac", "required":true, "ifnull": {"subst_constructor":{"vals":[ {"subst":"subsample_seed", "ifnull":{"subst":"s2_id_run", "required":true}}, "${frac}" ],"postproc":{"op":"concat","pad":""}}}}, - "-b", - "-", - ";", - "else >&2 printf \"No tag metrics, no subsample\"; fi;'" - ] + "type":"VTFILE", + "name":{"subst":"s2_subsample_method", "required":true, + "ifnull":{ + "select":"s2_subsample_method_switch", "default":"tmf","select_range":[1], + "cases":{ + "tmf":"subsample_tmf.json", + "spec_frac":"subsample_spec_frac.json"} + }}, + "node_prefix":"ssm_" }, { "id":"bamtofastq_ss", diff --git a/data/vtlib/subsample_spec_frac.json b/data/vtlib/subsample_spec_frac.json new file mode 100644 index 00000000..838a763a --- /dev/null +++ b/data/vtlib/subsample_spec_frac.json @@ -0,0 +1,31 @@ +{ +"version":"2.0", +"description":"produce 10k subsample fastq files", +"subgraph_io":{ + "ports":{ + "inputs":{ + "_stdin_":"subsample" + }, + "outputs":{ + "_stdout_":"subsample" + } + } +}, +"subst_params":[], +"nodes":[ + { + "id":"subsample", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":[ + "samtools", + "view", + "-s", {"subst":"seed_frac", "required":true, "ifnull": {"subst_constructor":{"vals":[ {"subst":"subsample_seed", "ifnull":{"subst":"s2_id_run", "required":true}}, {"subst":"ss_frac", "required":true}],"postproc":{"op":"concat","pad":"."}}}}, + "-b", + "-" + ] + } +], +"edges":[] +} diff --git a/data/vtlib/subsample_tmf.json b/data/vtlib/subsample_tmf.json new file mode 100644 index 00000000..5a18bf92 --- /dev/null +++ b/data/vtlib/subsample_tmf.json @@ -0,0 +1,37 @@ +{ +"version":"2.0", +"description":"produce 10k subsample fastq files", +"subgraph_io":{ + "ports":{ + "inputs":{ + "_stdin_":"subsample" + }, + "outputs":{ + "_stdout_":"subsample" + } + } +}, +"subst_params":[], +"nodes":[ + { + "id":"subsample", + "type":"EXEC", + "subtype":"STRINGIFY", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":[ + "bash -c '", + {"subst_constructor":{"vals":["tmfs=\"", {"subst":"tag_metrics_files", "required":true}, "\""],"postproc":{"op":"concat","pad":""}}}, "; if [ ! -z \"${tmfs}\" ]; then for tag_metrics_file in ${tmfs}; do reads_count=`jq", {"subst":"jqkey", "ifnull":{"subst_constructor":{"vals":["'\"'\"'.reads_count.\"", {"subst":"s2_tag_index", "required":true}, "\"'\"'\"'"],"postproc":{"op":"concat","pad":""}}}}, "${tag_metrics_file}`; reads_count=`echo ${reads_count} | tr -cd [:digit:]`; reads_count_total=$((${reads_count_total}+${reads_count})); done; if [[ $reads_count_total -eq 0 ]]; then reads_count_total=1; fi; frac=`echo \"10000/${reads_count_total}\" | bc -l`; fi;", + "if [ ! -z $frac ]; then", + "samtools", + "view", + "-s", {"subst":"seed_frac", "required":true, "ifnull": {"subst_constructor":{"vals":[ {"subst":"subsample_seed", "ifnull":{"subst":"s2_id_run", "required":true}}, "${frac}" ],"postproc":{"op":"concat","pad":""}}}}, + "-b", + "-", + ";", + "else >&2 printf \"No tag metrics, no subsample\"; fi;'" + ] + } +], +"edges":[] +} From c97fa11429b93c267b977acea00078b0edfc6199 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 19 Jul 2024 11:50:35 +0100 Subject: [PATCH 3/7] upgrade bambi version in container to 0.18.0 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8fa82ed7..f8414700 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ARG BASE_IMAGE=ubuntu:22.04 -ARG BAMBI_VERSION="0.17.1" +ARG BAMBI_VERSION="0.18.0" ARG BIOBAMBAM2_VERSION="2.0.185-release-20221211202123" ARG BWA_VERSION="0.7.18" ARG BWA_MEM2_VERSION="2.2.1" From 78ba225cefb5e95d5c5dbcd8d5e0629aab92e337 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 31 Jul 2024 18:05:12 +0100 Subject: [PATCH 4/7] allow either FASTQ or CRAM input with optional read2tags processing --- .../vtlib/alignment_wtsi_stage2_template.json | 4 +- data/vtlib/fastq_s2_pi.json | 40 ++++++++++++++----- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 1ec06f98..5d622004 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -102,7 +102,9 @@ "select":"s2_ppi_switch", "default":"crammerge","select_range":[1], "cases":{ "crammerge":"crammerge.json", - "aviti":"elembio_nanoseq_s2_pi.json"} + "pp_ns":"fastq_s2_pi.json", + "aviti":"elembio_nanoseq_s2_pi.json" + } }}, "subst_map":{"input_format":{"subst":"s2_input_format"}}, "comment":"inputs: NONE; outputs: _stdout_ (bam), subst_map_parameters:[input_format]", diff --git a/data/vtlib/fastq_s2_pi.json b/data/vtlib/fastq_s2_pi.json index a7195742..d6649d0f 100644 --- a/data/vtlib/fastq_s2_pi.json +++ b/data/vtlib/fastq_s2_pi.json @@ -20,15 +20,37 @@ "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd": [ - {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "import", - "-R", {"subst":"fastq_s2_pi_RG_ID","required":true, "comment":"readgroup"}, - "-1", {"subst":"fastq_s2_pi_fq1","required":true, "comment":"FASTQ read 1"}, - "-2", {"subst":"fastq_s2_pi_fq2","required":true, "comment":"FASTQ read 2"}, - "-i", - "-u", - "-O", "bam" - ] + "cmd": { + "select":"pp_import_method", + "required":true, + "select_range":[1], + "default":"crammerge", + "cases":{ + "crammerge": + [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "merge", + "-n", + "-O", "BAM", + "-l", "0", + {"select":"input_format", "default":"cram", "select_range":[1], "cases":{ + "cram":["--input-fmt-option", "no_ref=1"], + "bam":["--input-fmt", "bam"] + }}, + "-", + {"subst":"incrams", "required":true} + ], + "fastq": + [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "import", + "-R", {"subst":"fastq_s2_pi_RG_ID","required":true, "comment":"readgroup"}, + "-1", {"subst":"fastq_s2_pi_fq1","required":true, "comment":"FASTQ read 1"}, + "-2", {"subst":"fastq_s2_pi_fq2","required":true, "comment":"FASTQ read 2"}, + "-i", + "-u", + "-O", "bam" + ] + } + } }, { "id":"read2tags", From b0e192714b2ffd87be94ab1dc7792a8c2fba5dd6 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 2 Aug 2024 17:44:57 +0100 Subject: [PATCH 5/7] always run samtools collate+reset before bambi read2tags --- data/vtlib/fastq_s2_pi.json | 12 +++------ data/vtlib/read2tags.json | 54 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 data/vtlib/read2tags.json diff --git a/data/vtlib/fastq_s2_pi.json b/data/vtlib/fastq_s2_pi.json index d6649d0f..72de2b58 100644 --- a/data/vtlib/fastq_s2_pi.json +++ b/data/vtlib/fastq_s2_pi.json @@ -60,20 +60,14 @@ "select_range":[1], "default":"off", "cases":{ - "on":"EXEC", + "on":"VTFILE", "off":"INACTIVE" } }, "use_STDIN": true, "use_STDOUT": true, - "cmd": [ - {"subst":"bambi_executable", "required":true, "ifnull":"bambi"}, "read2tags", - "--tags", "rb,mb,br,rb,mb,br", - "--qtags", "rq,mq,bq,rq,mq,bq", - "--positions", "1:1:1:3,1:2:1:3,1:1:4:7,2:2:1:3,2:1:1:3,2:2:4:7", - "--compression-level", 0, - "--output-fmt", "bam" - ] + "name":"read2tags.json", + "node_prefix":"r2t_" } ], "edges":[ diff --git a/data/vtlib/read2tags.json b/data/vtlib/read2tags.json new file mode 100644 index 00000000..58424c49 --- /dev/null +++ b/data/vtlib/read2tags.json @@ -0,0 +1,54 @@ +{ +"version":"2.0", +"description":"read2tags for NanoSeq processing, including preparatory collation and reset", +"subgraph_io":{ + "ports":{ + "inputs":{"_stdin_":"collate"}, + "outputs":{ "_stdout_":"read2tags" } + } +}, +"nodes":[ + { + "id":"collate", + "type": "EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "collate", + "--threads", {"subst":"s2_r2t_coll_threads","required":true,"ifnull":2}, + "-u", + "-O", + "-" + ] + }, + { + "id":"reset", + "type": "EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reset", + "--threads", {"subst":"s2_r2t_rs_threads","required":true,"ifnull":4}, + "--output-fmt", "BAM,level=0" + ] + }, + { + "id":"read2tags", + "type": "EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"bambi_executable", "required":true, "ifnull":"bambi"}, "read2tags", + "--tags", "rb,mb,br,rb,mb,br", + "--qtags", "rq,mq,bq,rq,mq,bq", + "--positions", "1:1:1:3,1:2:1:3,1:1:4:7,2:2:1:3,2:1:1:3,2:2:4:7", + "--compression-level", 0, + "--output-fmt", "bam" + ] + } +], +"edges":[ + { "id":"collate_to_reset", "from":"collate","to":"reset" }, + { "id":"reset_to_read2tags", "from":"reset", "to":"read2tags" } +] +} From ec6c748ca0f16988ff4ac001b5a429c741ca138f Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 13 Aug 2024 12:49:12 +0100 Subject: [PATCH 6/7] set defaults for samtools import flags "-i" and "-T", and make them optional --- data/vtlib/fastq_s2_pi.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data/vtlib/fastq_s2_pi.json b/data/vtlib/fastq_s2_pi.json index 72de2b58..32039b93 100644 --- a/data/vtlib/fastq_s2_pi.json +++ b/data/vtlib/fastq_s2_pi.json @@ -45,7 +45,8 @@ "-R", {"subst":"fastq_s2_pi_RG_ID","required":true, "comment":"readgroup"}, "-1", {"subst":"fastq_s2_pi_fq1","required":true, "comment":"FASTQ read 1"}, "-2", {"subst":"fastq_s2_pi_fq2","required":true, "comment":"FASTQ read 2"}, - "-i", + {"select":"parse_casava_id", "default":"on", "select_range":[1], "cases":{ "on":["-i"], "off":[] }}, + {"subst":"parse_import_tags_flag", "ifnull":["-T", {"subst":"parse_import_tags","required":true,"ifnull":"*"}]}, "-u", "-O", "bam" ] From b4c6065406f5701a5209b71fc1ba01cf606cdd8c Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Tue, 27 Aug 2024 16:10:24 +0100 Subject: [PATCH 7/7] add static parameter files for stage2 reanalysis improve naming of template names and parameter names&values for stage2 input preprocessing --- .../stage2_reanalysis/align_bwa_mem2.json | 13 +++++++ .../base_params_duplexseq_cram.json | 37 +++++++++++++++++++ .../base_params_duplexseq_fastq.json | 35 ++++++++++++++++++ .../vtlib/alignment_wtsi_stage2_template.json | 3 +- ..._pi.json => stage2_preprocess_inputs.json} | 0 5 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 data/static_params/stage2_reanalysis/align_bwa_mem2.json create mode 100644 data/static_params/stage2_reanalysis/base_params_duplexseq_cram.json create mode 100644 data/static_params/stage2_reanalysis/base_params_duplexseq_fastq.json rename data/vtlib/{fastq_s2_pi.json => stage2_preprocess_inputs.json} (100%) diff --git a/data/static_params/stage2_reanalysis/align_bwa_mem2.json b/data/static_params/stage2_reanalysis/align_bwa_mem2.json new file mode 100644 index 00000000..477d9712 --- /dev/null +++ b/data/static_params/stage2_reanalysis/align_bwa_mem2.json @@ -0,0 +1,13 @@ +{ + "assign": [ + { + "alignment_method": "bwa_mem", + "bwa_executable": "bwa-mem2" + } + ], + "assign_local": {}, + "ops": { + "splice": [], + "prune": [] + } +} diff --git a/data/static_params/stage2_reanalysis/base_params_duplexseq_cram.json b/data/static_params/stage2_reanalysis/base_params_duplexseq_cram.json new file mode 100644 index 00000000..bafe2753 --- /dev/null +++ b/data/static_params/stage2_reanalysis/base_params_duplexseq_cram.json @@ -0,0 +1,37 @@ +{ + "assign": [ + { + "spatial_filter_switch":"off", + "markdup_optical_distance_value": "100", + "s2_se_pe": "pe", + "samtools_executable": "samtools", + "s2_input_format": "cram", + "markdup_method": "duplexseq", + "s2_ppi_switch":"s2_ppi", + "pp_read2tags":"on", + "pp_import_method":"crammerge", + "fastq_s2_pi_fq1": "DUMMY", + "fastq_s2_pi_fq2": "DUMMY", + "fastq_s2_pi_RG_ID": "DUMMY", + "s2_filter_files": "DUMMY", + "spatial_filter_file": "DUMMY", + "phix_reference_genome_fasta":"DUMMY", + "realignment_switch":1 + } + ], + "assign_local": {}, + "ops": { + "splice": [ + "aln_bam12auxmerge:-foptgt_000_fixmate:", + "foptgt_seqchksum_file:-scs_cmp_seqchksum:outputchk" + ], + "prune": [ + "foptgt.*_bmd_multiway:calibration_pu-", + "foptgt_cram_tee:c2a-", + "foptgt.*samtools_stats_F0.*_target.*-", + "foptgt.*samtools_stats_F0.*00_bait.*-", + "aln_tee3_tee3:to_phix_aln-scs_cmp_seqchksum:outputchk", + "ssfqc_tee_ssfqc:subsample-" + ] + } +} diff --git a/data/static_params/stage2_reanalysis/base_params_duplexseq_fastq.json b/data/static_params/stage2_reanalysis/base_params_duplexseq_fastq.json new file mode 100644 index 00000000..ab9f5538 --- /dev/null +++ b/data/static_params/stage2_reanalysis/base_params_duplexseq_fastq.json @@ -0,0 +1,35 @@ +{ + "assign": [ + { + "spatial_filter_switch":"off", + "markdup_optical_distance_value": "100", + "s2_se_pe": "pe", + "samtools_executable": "samtools", + "s2_input_format": "cram", + "markdup_method": "duplexseq", + "s2_ppi_switch":"s2_ppi", + "pp_read2tags":"on", + "pp_import_method":"fastq", + "incrams": "DUMMY", + "s2_filter_files": "DUMMY", + "spatial_filter_file": "DUMMY", + "phix_reference_genome_fasta":"DUMMY", + "realignment_switch":1 + } + ], + "assign_local": {}, + "ops": { + "splice": [ + "aln_bam12auxmerge:-foptgt_000_fixmate:", + "foptgt_seqchksum_file:-scs_cmp_seqchksum:outputchk" + ], + "prune": [ + "foptgt.*_bmd_multiway:calibration_pu-", + "foptgt_cram_tee:c2a-", + "foptgt.*samtools_stats_F0.*_target.*-", + "foptgt.*samtools_stats_F0.*00_bait.*-", + "aln_tee3_tee3:to_phix_aln-scs_cmp_seqchksum:outputchk", + "ssfqc_tee_ssfqc:subsample-" + ] + } +} diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 5d622004..bbb29e45 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -102,8 +102,7 @@ "select":"s2_ppi_switch", "default":"crammerge","select_range":[1], "cases":{ "crammerge":"crammerge.json", - "pp_ns":"fastq_s2_pi.json", - "aviti":"elembio_nanoseq_s2_pi.json" + "s2_ppi":"stage2_preprocess_inputs.json" } }}, "subst_map":{"input_format":{"subst":"s2_input_format"}}, diff --git a/data/vtlib/fastq_s2_pi.json b/data/vtlib/stage2_preprocess_inputs.json similarity index 100% rename from data/vtlib/fastq_s2_pi.json rename to data/vtlib/stage2_preprocess_inputs.json