From 37323d151d6956fe3c7c5666b27918fe19a7d273 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 14:39:34 +0100 Subject: [PATCH 01/23] cutadapt: fix tests test 4 was failing due to https://github.com/galaxyproject/galaxy/pull/15085 here the size assertion makes no sense given that we compare for exact equality anyway. --- tools/cutadapt/cutadapt.xml | 16 ++++------------ tools/cutadapt/test-data/cutadapt_rest.json | 3 +++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 8b591056265..17b619388a7 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -482,16 +482,8 @@ $read_mod_options.zero_cap - - - - - - - - - - + + @@ -540,8 +532,8 @@ $read_mod_options.zero_cap - - + + diff --git a/tools/cutadapt/test-data/cutadapt_rest.json b/tools/cutadapt/test-data/cutadapt_rest.json index c245a713b6a..634d3a81f46 100644 --- a/tools/cutadapt/test-data/cutadapt_rest.json +++ b/tools/cutadapt/test-data/cutadapt_rest.json @@ -46,6 +46,9 @@ "quality_trimmed": null, "quality_trimmed_read1": null, "quality_trimmed_read2": null, + "poly_a_trimmed": null, + "poly_a_trimmed_read1": null, + "poly_a_trimmed_read2": null, "output": 35, "output_read1": 35, "output_read2": null From b64b22b5419d1016b9329271b7b070a3e859b1b8 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Thu, 2 Nov 2023 17:35:21 +0100 Subject: [PATCH 02/23] cutadapt: add argument to quality_cutoff2 --- tools/cutadapt/macros.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index d950ca6727f..7ba20949488 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -351,14 +351,13 @@ - + [0-9,]+ - From f009d9daf1df3c1dcbf2b21a3cd4fcc9b30634a8 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 12:45:10 +0100 Subject: [PATCH 03/23] cutadapt: unify adapter specification using macros --- tools/cutadapt/cutadapt.xml | 68 +++++++++--------- tools/cutadapt/macros.xml | 138 ++++++++++++++++++------------------ 2 files changed, 103 insertions(+), 103 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 17b619388a7..0fa427af600 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -439,9 +439,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -475,9 +475,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -580,17 +580,17 @@ $read_mod_options.zero_cap
- - - - + + + + - - - - + + + +
@@ -612,9 +612,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -684,8 +684,8 @@ $read_mod_options.zero_cap
- - + +
@@ -837,9 +837,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -873,9 +873,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -1061,9 +1061,9 @@ $read_mod_options.zero_cap
- - - + + +
@@ -1115,9 +1115,9 @@ $read_mod_options.zero_cap
- - - + + + @@ -1147,9 +1147,9 @@ $read_mod_options.zero_cap
- - - + + + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 7ba20949488..b94d0dab50e 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -40,26 +40,26 @@ -a '${a.adapter_source.adapter}${adapter_options.internal}${a.single_noindels}' #end if #end for - #for $aa in $library.r1.anywhere_adapters - #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'builtin': - -b '${aa.anywhere_adapter_source.anywhere_adapter.fields.name}'='${aa.anywhere_adapter_source.anywhere_adapter}${aa.single_noindels}' - #else if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'file': - -b file:'${aa.anywhere_adapter_source.anywhere_adapter_file}${aa.single_noindels}' - #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "": - -b '${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}${aa.single_noindels}' + #for $a in $library.r1.anywhere_adapters + #if $a.adapter_source.adapter_source_list == 'builtin': + -b '${a.adapter_source.adapter.fields.name}'='${a.adapter_source.adapter}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + -b file:'${a.adapter_source.adapter_file}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + -b '${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}${a.single_noindels}' #else - -b '${aa.anywhere_adapter_source.anywhere_adapter}${aa.single_noindels}' + -b '${a.adapter_source.adapter}${a.single_noindels}' #end if #end for - #for $fa in $library.r1.front_adapters - #if $fa.front_adapter_source.front_adapter_source_list == 'builtin': - -g '${fa.front_adapter_source.front_adapter.fields.name}'='${adapter_options.internal}${fa.front_adapter_source.front_adapter}${fa.single_noindels}' - #else if $fa.front_adapter_source.front_adapter_source_list == 'file': - -g file:'${adapter_options.internal}${fa.front_adapter_source.front_adapter_file}${fa.single_noindels}' - #else if str($fa.front_adapter_source.front_adapter_name) != "": - -g '${fa.front_adapter_source.front_adapter_name}'='${adapter_options.internal}${fa.front_adapter_source.front_adapter}${fa.single_noindels}' + #for $a in $library.r1.front_adapters + #if $a.adapter_source.adapter_source_list == 'builtin': + -g '${a.adapter_source.adapter.fields.name}'='${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + -g file:'${adapter_options.internal}${a.adapter_source.adapter_file}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + -g '${a.adapter_source.adapter_name}'='${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' #else - -g '${adapter_options.internal}${fa.front_adapter_source.front_adapter}${fa.single_noindels}' + -g '${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' #end if #end for @@ -95,36 +95,36 @@ ## Read2 trimming #for $a in $library.r2.adapters2 - #if $a.adapter_source2.adapter_source_list2 == 'builtin': - -A '${a.adapter_source2.adapter2.fields.name}'='${a.adapter_source2.adapter2}${adapter_options.internal}${a.single_noindels}' - #else if $a.adapter_source2.adapter_source_list2 == 'file': - -A file:'${a.adapter_source2.adapter_file2}${adapter_options.internal}${a.single_noindels}' - #else if str($a.adapter_source2.adapter_name2) != "": - -A '${a.adapter_source2.adapter_name2}'='${a.adapter_source2.adapter2}${adapter_options.internal}${a.single_noindels}' + #if $a.adapter_source.adapter_source_list == 'builtin': + -A '${a.adapter_source.adapter.fields.name}'='${a.adapter_source.adapter}${adapter_options.internal}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + -A file:'${a.adapter_source.adapter_file}${adapter_options.internal}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + -A '${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}${adapter_options.internal}${a.single_noindels}' #else - -A '${a.adapter_source2.adapter2}${adapter_options.internal}${a.single_noindels}' + -A '${a.adapter_source.adapter}${adapter_options.internal}${a.single_noindels}' #end if #end for - #for $aa in $library.r2.anywhere_adapters2 - #if $aa.anywhere_adapter_source2.anywhere_adapter_source_list2 == 'builtin': - -B '${aa.anywhere_adapter_source2.anywhere_adapter2.fields.name}'='${aa.anywhere_adapter_source2.anywhere_adapter2}${aa.single_noindels}' - #else if $aa.anywhere_adapter_source2.anywhere_adapter_source_list2 == 'file': - -B file:'${aa.anywhere_adapter_source2.anywhere_adapter_file2}${aa.single_noindels}' - #else if str($aa.anywhere_adapter_source2.anywhere_adapter_name2) != "": - -B '${aa.anywhere_adapter_source2.anywhere_adapter_name2}'='${aa.anywhere_adapter_source2.anywhere_adapter2}${aa.single_noindels}' + #for $a in $library.r2.anywhere_adapters2 + #if $a.adapter_source.adapter_source_list == 'builtin': + -B '${a.adapter_source.adapter.fields.name}'='${a.adapter_source.adapter}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + -B file:'${a.adapter_source.adapter_file}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + -B '${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}${a.single_noindels}' #else - -B '${aa.anywhere_adapter_source2.anywhere_adapter2}' + -B '${a.adapter_source.adapter}' #end if #end for - #for $fa in $library.r2.front_adapters2 - #if $fa.front_adapter_source2.front_adapter_source_list2 == 'builtin': - -G '${fa.front_adapter_source2.front_adapter2.fields.name}'='${adapter_options.internal}${fa.front_adapter_source2.front_adapter2}${fa.single_noindels}' - #else if $fa.front_adapter_source2.front_adapter_source_list2 == 'file': - -G file:'${adapter_options.internal}${fa.front_adapter_source2.front_adapter_file2}${fa.single_noindels}' - #else if str($fa.front_adapter_source2.front_adapter_name2) != "": - -G '${fa.front_adapter_source2.front_adapter_name2}'='${adapter_options.internal}${fa.front_adapter_source2.front_adapter2}${fa.single_noindels}' + #for $a in $library.r2.front_adapters2 + #if $a.adapter_source.adapter_source_list == 'builtin': + -G '${a.adapter_source.adapter.fields.name}'='${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + -G file:'${adapter_options.internal}${a.adapter_source.adapter_file}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + -G '${a.adapter_source.adapter_name}'='${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' #else - -G '${adapter_options.internal}${fa.front_adapter_source2.front_adapter2}${fa.single_noindels}' + -G '${adapter_options.internal}${a.adapter_source.adapter}${a.single_noindels}' #end if #end for @@ -226,44 +226,44 @@ - - + + - - + + - + - + - - + + - - + + - + - + @@ -284,67 +284,67 @@
- - + + - - + + - + - + - - + + - - + + - + - + - - + + - - + + - + - + From ed4f20e0bd957b97d384d758e1929a698d375e7e Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 16:21:36 +0100 Subject: [PATCH 04/23] cutadapt: replace adapter conditionals by macros --- tools/cutadapt/macros.xml | 172 +++++++------------------------------- 1 file changed, 32 insertions(+), 140 deletions(-) diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index b94d0dab50e..0058c654f50 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -182,97 +182,47 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + -
- - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + - -
-
@@ -282,72 +232,14 @@
- - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + From 7455e58cec3ddda4173a7ddd855ce27e2065a2d0 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 16:56:50 +0100 Subject: [PATCH 05/23] cutadapt: move cut argument --- tools/cutadapt/cutadapt.xml | 5 +++-- tools/cutadapt/macros.xml | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 0fa427af600..9f953cf1985 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -259,6 +259,7 @@ $read_mod_options.zero_cap
+ @@ -714,8 +715,8 @@ $read_mod_options.zero_cap -
+ @@ -729,8 +730,8 @@ $read_mod_options.zero_cap -
+
diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 0058c654f50..ebcf523a13f 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -63,8 +63,8 @@ #end if #end for - #if str($library.r1.cut) != '0': - -u $library.r1.cut + #if str($cut) != '0': + -u $cut #end if ## Additional Outputs @@ -221,7 +221,6 @@ -
From 212f0e8cae02ee511cdf5012e7a0042f8ac30976 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 17:12:17 +0100 Subject: [PATCH 06/23] cutadapt: drop internal and unify cli generation for adapter options - makes no sense to apply the `internal` property (which just appends an X to the adapter sequence) to all adapters - also came in handy for unifying the cli generation for the adapter options using a single token --- tools/cutadapt/cutadapt.xml | 24 +++-------- tools/cutadapt/macros.xml | 82 +++++++++++++------------------------ 2 files changed, 34 insertions(+), 72 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 9f953cf1985..5412c21fa51 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -212,10 +212,6 @@ $read_mod_options.zero_cap - - - - @@ -1028,6 +1024,7 @@ $read_mod_options.zero_cap
+ @@ -1036,13 +1033,10 @@ $read_mod_options.zero_cap - + -
- -
@@ -1056,7 +1050,7 @@ $read_mod_options.zero_cap - + @@ -1064,17 +1058,14 @@ $read_mod_options.zero_cap - + -
- -
- +
@@ -1088,12 +1079,9 @@ $read_mod_options.zero_cap -
- -
- +
diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index ebcf523a13f..302fde77d4f 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -25,42 +25,37 @@ cutadapt
+ + + + #if $a.adapter_source.adapter_source_list == 'builtin': + $ADAPTER_ARGUMENT '${a.adapter_source.adapter.fields.name}'='${a.adapter_source.adapter}${a.single_noindels}' + #else if $a.adapter_source.adapter_source_list == 'file': + $ADAPTER_ARGUMENT file:'${a.adapter_source.adapter_file}${a.single_noindels}' + #else if str($a.adapter_source.adapter_name) != "": + $ADAPTER_ARGUMENT '${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}${a.single_noindels}' + #else + $ADAPTER_ARGUMENT '${a.adapter_source.adapter}${a.single_noindels}' + #end if + + Date: Fri, 3 Nov 2023 21:25:49 +0100 Subject: [PATCH 07/23] cutadapt: move min/max length for R2 to R2 options fixes https://github.com/galaxyproject/tools-iuc/issues/5099 --- tools/cutadapt/cutadapt.xml | 46 +++++++++---------------------------- tools/cutadapt/macros.xml | 18 +++++++++------ 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 5412c21fa51..569bb0d9a1f 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -86,7 +86,7 @@ cutadapt -j=\${GALAXY_SLOTS:-4} #if 'json_stats' in $output_selector: - --json stats.json + --json stats.json #end if #if str( $library.type ) == "single": @@ -114,13 +114,13 @@ $adapter_options.revcomp $filter_options.discard_trimmed $filter_options.discard_untrimmed -#if str($filter_options.minimum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True': - --minimum-length=$filter_options.minimum_length:$filter_options.length_R2_options.R2_minimum +#if str($filter_options.minimum_length) and str($library.type) != "single" and str($library.minimum_length2) != '': + --minimum-length=$filter_options.minimum_length:$library.minimum_length2 #else if str($filter_options.minimum_length): --minimum-length=$filter_options.minimum_length #end if -#if str($filter_options.maximum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True': - --maximum-length=$filter_options.maximum_length:$filter_options.length_R2_options.R2_maximum +#if str($filter_options.maximum_length) and str($library.type) != "single" and str($library.maximum_length2) != '': + --maximum-length=$filter_options.maximum_length:$library.maximum_length2 #else if str($filter_options.maximum_length): --maximum-length=$filter_options.maximum_length #end if @@ -230,18 +230,6 @@ $read_mod_options.zero_cap - - - - - - - - - - - - @@ -839,14 +827,11 @@ $read_mod_options.zero_cap +
- - - -
@@ -875,14 +860,11 @@ $read_mod_options.zero_cap +
- - - -
@@ -911,20 +893,17 @@ $read_mod_options.zero_cap +
- - - -
- + @@ -948,21 +927,18 @@ $read_mod_options.zero_cap +
- - - -
- + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 302fde77d4f..3835a91b06b 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -215,13 +215,17 @@ - - - - - - [0-9,]+ - + + + + + + + [0-9,]+ + + + + From 510a4af97ef0d272ba66d6ee0f0f42c13f23ab8f Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 22:00:50 +0100 Subject: [PATCH 08/23] cutadapt: use checked for bools instead of value and use lowe case boolean attribute values --- tools/cutadapt/cutadapt.xml | 89 ++++++++++++++++++------------------- tools/cutadapt/macros.xml | 14 +++--- 2 files changed, 50 insertions(+), 53 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 569bb0d9a1f..d13ef2093e6 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -206,44 +206,44 @@ $read_mod_options.zero_cap
- + - + - + - +
- - - - - - - + + + + + + + - - + +
- + @@ -251,12 +251,12 @@ $read_mod_options.zero_cap [0-9,]+ - + - + @@ -268,7 +268,7 @@ $read_mod_options.zero_cap - + @@ -281,7 +281,7 @@ $read_mod_options.zero_cap [A-Za-z0-9 =-_/+]+ - + @@ -293,11 +293,11 @@ $read_mod_options.zero_cap [A-Za-z0-9 {}=_]+ - +
- + @@ -407,7 +407,7 @@ $read_mod_options.zero_cap - + @@ -430,8 +430,8 @@ $read_mod_options.zero_cap - - + + @@ -497,7 +497,7 @@ $read_mod_options.zero_cap
- +
@@ -534,7 +534,7 @@ $read_mod_options.zero_cap - +
@@ -606,12 +606,9 @@ $read_mod_options.zero_cap - - - - - - + + + @@ -644,7 +641,7 @@ $read_mod_options.zero_cap - + @@ -1120,8 +1117,8 @@ $read_mod_options.zero_cap - - + + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 3835a91b06b..63a90f9a502 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -159,12 +159,12 @@ - + - + @@ -185,7 +185,7 @@ -
+
@@ -204,7 +204,7 @@ -
+
@@ -216,7 +216,7 @@ - + @@ -224,8 +224,8 @@ [0-9,]+ - - + +
From 6f5239795463db4de02e82762707600b2ba686cb Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 23:08:59 +0100 Subject: [PATCH 09/23] cutadapt: only allow fastqsanger if we would want to consider fastqillumina/solexa then we would need to set `--quality-base 33`. while this would be easy, we would also need to set the output type equal to the input type (which seems not worth the effort) .. alternatively we could call some converter on the output to make it fastqsanger. --- tools/cutadapt/macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 63a90f9a502..fbfcef70eb1 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -1,7 +1,7 @@ 4.4 0 - fastq.gz,fastq,fasta + fastqsanger.bz2,fastqsanger.gz,fastqsanger,fasta topic_0632 From 74b5a7f406cfd02074629b72b42dc51a7922d7f8 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 Nov 2023 23:17:34 +0100 Subject: [PATCH 10/23] cutadapt: bump --- tools/cutadapt/macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index fbfcef70eb1..60ad4755989 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -1,6 +1,6 @@ 4.4 - 0 + 1 fastqsanger.bz2,fastqsanger.gz,fastqsanger,fasta From 2a48ce1095fc3017b2b0ce4d61024a9080931f85 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Sun, 5 Nov 2023 17:18:01 +0100 Subject: [PATCH 11/23] cutadapt: fix output handling, add polyA trimming arg, bump - fasta output could not be detected due to `out[12].fq*` - polyA trimming tests required bump --- tools/cutadapt/cutadapt.xml | 80 +++++++++++++++++++++++++++++++++++-- tools/cutadapt/macros.xml | 4 +- 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index d13ef2093e6..4c2018db6d6 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -144,6 +144,7 @@ $filter_options.discard_cassava --nextseq-trim=$read_mod_options.nextseq_trim #end if $read_mod_options.trim_n +$read_mod_options.poly_a #if $read_mod_options.strip_suffix != '' --strip-suffix $read_mod_options.strip_suffix #end if @@ -252,6 +253,7 @@ $read_mod_options.zero_cap + @@ -311,20 +313,20 @@ $read_mod_options.zero_cap - + library['type'] != 'paired_collection' and 'multiple_output' not in output_selector - + library['type'] == 'paired' and 'multiple_output' not in output_selector library['type'] == 'paired_collection' and 'multiple_output' not in output_selector - - + + @@ -1121,6 +1123,76 @@ $read_mod_options.zero_cap + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + +
+ + + + + + +
+ + + + + + + + + + + - 4.4 - 1 + 4.5 + 0 fastqsanger.bz2,fastqsanger.gz,fastqsanger,fasta From 3a88e6042755e54fd1f032aed3ae94fbac096c8e Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 6 Nov 2023 10:47:51 +0100 Subject: [PATCH 12/23] cutadapt: fix handling of fastqillumina - requires `--quality-base=64` set - also allow .bz2 types (which were already covered by the code) --- tools/cutadapt/cutadapt.xml | 35 +++++++++++++++++++++++++++++++++++ tools/cutadapt/macros.xml | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 4c2018db6d6..0316ed8c203 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -137,6 +137,10 @@ $filter_options.discard_untrimmed #end if $filter_options.discard_cassava +#if $input_1.ext.startswith("fastqillumina") or $input_1.ext.startswith("fastqsolexa") + --quality-base=64 +#end if + #if str($read_mod_options.quality_cutoff) != '0': --quality-cutoff=$read_mod_options.quality_cutoff #end if @@ -1193,6 +1197,37 @@ $read_mod_options.zero_cap + + + + +
+ + + + + + +
+ + + + + + + +
+ + + + + + + + + + + 4.5 0 - fastqsanger.bz2,fastqsanger.gz,fastqsanger,fasta + fastq.bz2,fastq.gz,fastq,fasta topic_0632 From 3f40a9a98fff8dd47613fab3c1ba940fa50a7bbe Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 6 Nov 2023 12:51:15 +0100 Subject: [PATCH 13/23] cutadapt: fix compressed fasta handling --- tools/cutadapt/cutadapt.xml | 38 ++++++--- tools/cutadapt/macros.xml | 2 +- .../test-data/cutadapt/cut/illumina64.fastq | 80 ++++++++++++++++++ .../test-data/cutadapt/cut/simple.fasta | 4 + .../test-data/cutadapt/cut/simple.fasta.gz | Bin 0 -> 70 bytes .../test-data/cutadapt/data/illumina64.fastq | 80 ++++++++++++++++++ .../test-data/cutadapt/data/simple.fasta | 7 ++ .../test-data/cutadapt/data/simple.fasta.gz | Bin 0 -> 99 bytes 8 files changed, 197 insertions(+), 14 deletions(-) create mode 100644 tools/cutadapt/test-data/cutadapt/cut/illumina64.fastq create mode 100644 tools/cutadapt/test-data/cutadapt/cut/simple.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/cut/simple.fasta.gz create mode 100644 tools/cutadapt/test-data/cutadapt/data/illumina64.fastq create mode 100644 tools/cutadapt/test-data/cutadapt/data/simple.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/data/simple.fasta.gz diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 0316ed8c203..bad3d349987 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -39,15 +39,18 @@ #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier)) #end if -#if $input_1.is_of_type("fastq.gz", "fastqsanger.gz"): - #set ext = ".fq.gz" -#else if $input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): - #set ext = ".fq.bz2" -#else if $input_1.is_of_type('fasta'): - #set ext = ".fa" -#else: +#if $input_1.ext.startswith("fastq"): #set ext = ".fq" +#else + #set ext = ".fa" +#end if +#if $input_1.ext.endswith(".gz"): + #set ext=ext+".gz" +#elif $input_1.ext.endswith(".bz2") + #set ext=ext+".bz2" #end if + + #set read1 = $read1 + $ext #set out1 = "out1" + $ext #set rest_output = "rest_output" + $ext @@ -1197,10 +1200,19 @@ $read_mod_options.zero_cap - + + + + + + + + + - +
@@ -1210,19 +1222,19 @@ $read_mod_options.zero_cap
- +
- + - + - + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 2da6b7ff232..1f4017b2463 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -1,7 +1,7 @@ 4.5 0 - fastq.bz2,fastq.gz,fastq,fasta + fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta topic_0632 diff --git a/tools/cutadapt/test-data/cutadapt/cut/illumina64.fastq b/tools/cutadapt/test-data/cutadapt/cut/illumina64.fastq new file mode 100644 index 00000000000..bbfce73cf0b --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/illumina64.fastq @@ -0,0 +1,80 @@ +@14569 +AAGTTTATTCCTGGACGAAGGAAGAAAAGGCCAGATGGGAAACAAGAACAAGCCCCTGTTGAAGACGCAGGGCC ++ +cceeeeceeeee`dedbdbdb_^b`abU_cacadabd`dLMZ[XTcT^a^adaaaddcd`aL^`^_`Y\]^`Y_ +@19211 +AGA ++ +^\` +@9180 +GAGGG ++ +b`bLb +@19132 +TGTGATTATCCACTGGTATAT ++ +Z[QZZLZ[]J[SHZNaZ[_Ia +@15868 +CTGCCAAGGCTGCCCCCAAA ++ +`c`cc\`\Lb]bL`[`a]L` +@1424 +GGCCCCAGACTTGCTCCCCCAACAAGGACAATGTCCAAGGAGTGTCCCC ++ +eeeeeeeea`bbdaaadad`Oaaaaccada_aa_d`_X`_^`[`_[_W^ +@7855 +GTGGGGGCT ++ +]^\]FW]Z` +@17943 +ACATGGGACCAGAAAACACCACCAGGGGTTTGGGGCTGTCCTGAG ++ +ccc`\^`aba\b^`\FR`OOPYG[[W```[Ra_RR_\]\\P\_H_ +@11100 +CGGATAACTGAAAATGCATTTTTAACGCCATGACCGTGTCTCAAGGACCCGCTGTGGAAG ++ +b`b_b_a\bc^Tabadaddcddd``bdaa_^aJ\^_\]\\__O[___L^\_aaa^^^UJ^ +@15663 +AGGT ++ +aaKa +@4698 +CCAATTGGCACCCCTCTGCCTTCAGCCATT ++ +cccc\`ccc\caccZccccc]^`LY\bL_b +@20649 +TCTGGACTGGATCTTTAGGATGGTGGAGATGATCTGGATGTAGGACAAAAGAACCAGGCAGAAGGGTG ++ +eeeeeaddadacdddebeccdddadd\^abbT_]bccTac]]b]L^][]Ve[^ZaY_^_^`\\Y]^Y` +@17259 + ++ + +@6003 +CTTCAACTCATCTTGTTATTAATACCATCAATATCCCATGAGGCTCATAAAACGAGTCTTTCTTCTTGGAAACATGACCAAGATTGGGCAAACGT ++ +fffffffffffffffffdffecfcefeffdcfdeeebbbdbccccc\db\`^aa`^Y^^^cbcbaa`bbWY^^^__S_YYR]GWY]\]]XX\_`S +@4118 +TCAAATTGTACTGCAAAGAAGGTCCCAGCTGGTCTCTTCTGGGAGTGATCTAACTAACTTAAG ++ +dc^ddeeeeeedeee`ceceddadadddcbde_dedc_ec_a^^b\b\\]VIPZY^T^^^\L_ +@18416 +GTGGGGAAGCCGAAGAAGCAGCGGAGATCGATTGTAAGAACGACG ++ +dddacaabdbea\d^cce\da`dd_^__`a`a`b[_^__^\^^^_ +@20115 +TGAAAAAGGAAAACATGGTAGTTTTCTTGTATGAGAGAGCCAGAGCCACCTTGGAGATTTTGTTCTCTCTGTGCG ++ +ed^eeafffaddfecdddabc^_badd`bd_ddadaa^bbcad\d\__^_\aaa_aY____aaN_\cdc\^aaYb +@16139 +TCATCCGAAGAGTTGGCAGGCCCTGTGAATTGTGAAAACAGTATACCCACCCCTTTCCC ++ +cabacacY^c\daaddaadad^\ad_a\Y`[ZQ]Y^^OYQ^X^YT\\]U\^RRX^\YJ^ +@14123 +GATTTGGGGAAAGGAAACAATAGTTGAGTTTGGGCCACGGGAAATTCAAGATGCCTGGTATGTC ++ +cccccccac^bYbbT_aa_Yb^^Ta\\^]]aaTaaaaab\b\XL`VZZV]QYYY[aa^^^^_^^ +@8766 +ACCTGTAAGGTCCGCTCCTGGTGGACACCCACGAAGTCCAGGGCCTCAGGCAGGAAGTTGTAGCGCAGAGTTTTGAGCAGCTGCTCCATC ++ +fcfffffcffeffeeefdefddeecdccacddfdYd`d^\_^`\_abbc\b[ba^Y^Z_^^H^Z_^Y_Y_OKWPZR]]Z]`Z``Z^UHZ^ diff --git a/tools/cutadapt/test-data/cutadapt/cut/simple.fasta b/tools/cutadapt/test-data/cutadapt/cut/simple.fasta new file mode 100644 index 00000000000..a86f5f1599b --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/simple.fasta @@ -0,0 +1,4 @@ +>first_sequence +SEQUENCE1 +>second_sequence +SEQUENCE2 diff --git a/tools/cutadapt/test-data/cutadapt/cut/simple.fasta.gz b/tools/cutadapt/test-data/cutadapt/cut/simple.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd721a9fcd96b21853db6ddf032986bd97130d64 GIT binary patch literal 70 zcmV-M0J;AkiwFp?%1C7Z19NF@aBO8RW?^%5VF0sB%PcA`i7!qqEKSWzPUQ-A4GeYl cb9Oc4vMWwa&d*E1lrrK10Q92L(lr180FQecF8}}l literal 0 HcmV?d00001 diff --git a/tools/cutadapt/test-data/cutadapt/data/illumina64.fastq b/tools/cutadapt/test-data/cutadapt/data/illumina64.fastq new file mode 100644 index 00000000000..bc5b10246e1 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/illumina64.fastq @@ -0,0 +1,80 @@ +@14569 +AAGTTTATTCCTGGACGAAGGAAGAAAAGGCCAGATGGGAAACAAGAACAAGCCCCTGTTGAAGACGCAGGGCCAACAGGGGCCAACGAAGCTGC ++ +cceeeeceeeee`dedbdbdb_^b`abU_cacadabd`dLMZ[XTcT^a^adaaaddcd`aL^`^_`Y\]^`Y_BBBBBBBBBBBBBBBBBBBBB +@19211 +AGAGGGCGTGTGATTGCTGGATGTGGGCGGGGGGCCGGGGGAGCCCCATGGGCAGGAGACCTGAGAGCCAGGCGGTGAGGCACTATGAACGCGAG ++ +^\`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@9180 +GAGGGGCAGCGACTAGTCACCGGACCTGTCAGGCAAGCATAAGCCGTGCGTCAGCACCACGCTGACGGTGCTCCCGCACTCGCGGGACGCGCCAC ++ +b`bLbBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@19132 +TGTGATTATCCACTGGTATATCGGCGTGCCGTCCGCACGAGGAAAAAAGGCATTATTGTTGTGGATCTGTACCATCGTTTGTCCCGTTACCCTTC ++ +Z[QZZLZ[]J[SHZNaZ[_IaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@15868 +CTGCCAAGGCTGCCCCCAAACCTGGCCCTCCGCGCACCCCACCACGGATCCTGACGTCCTGTCCCCCGCGGCTATGACAGCCAAGTCCCGTCAGC ++ +`c`cc\`\Lb]bL`[`a]L`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@1424 +GGCCCCAGACTTGCTCCCCCAACAAGGACAATGTCCAAGGAGTGTCCCCTGGGAAGGGTGGGCCTCCCCAGGTGCGGGCGGTGGGCACTGCCCCC ++ +eeeeeeeea`bbdaaadad`Oaaaaccada_aa_d`_X`_^`[`_[_W^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@7855 +GTGGGGGCTACAATGTGGCTCCAAGTTTTTTCCCGGGAGGTAAGGCCGGGAGCCCCCGCCCTGAGGGGGCGGGAAAGAGGAAGCCCGACGCGGAC ++ +]^\]FW]Z`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@17943 +ACATGGGACCAGAAAACACCACCAGGGGTTTGGGGCTGTCCTGAGGCTCGGGTAGCAAGCAGCGGGGCTCCGTGTCCAAGCACGCCGGTGTCACC ++ +ccc`\^`aba\b^`\FR`OOPYG[[W```[Ra_RR_\]\\P\_H_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@11100 +CGGATAACTGAAAATGCATTTTTAACGCCATGACCGTGTCTCAAGGACCCGCTGTGGAAGGGGCGCCGCAGCCAGAAGCTGGCCATGTCAGCGCG ++ +b`b_b_a\bc^Tabadaddcddd``bdaa_^aJ\^_\]\\__O[___L^\_aaa^^^UJ^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@15663 +AGGTGAAGTGGCAGGAGGACCGCCGGAAGAAGCTCTTCAGAACTCAGGGGGAGGGGGAAAGCAGAAACCAGAAGTCCAGTGAGCAGGGGGCTGAG ++ +aaKaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@4698 +CCAATTGGCACCCCTCTGCCTTCAGCCATTCCCTCTGGCTACTGCTCTCTGGTCGGGGCGCCTGGGCGACAGACTCTCTCCCCCCACCCCCCCGC ++ +cccc\`ccc\caccZccccc]^`LY\bL_bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@20649 +TCTGGACTGGATCTTTAGGATGGTGGAGATGATCTGGATGTAGGACAAAAGAACCAGGCAGAAGGGTGTCATCAGAAGAACACTGCTAGACACCA ++ +eeeeeaddadacdddebeccdddadd\^abbT_]bccTac]]b]L^][]Ve[^ZaY_^_^`\\Y]^Y`BBBBBBBBBBBBBBBBBBBBBBBBBBB +@17259 +GCCTTGTGTTGTTCCTGGCATCACCGCAGGGAGCCCTGGGGGGCCAGGCGGGCGCTGACCCTGGGCACTGCCGCGCCTGGAGGGGCTGAGCACCG ++ +BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@6003 +CTTCAACTCATCTTGTTATTAATACCATCAATATCCCATGAGGCTCATAAAACGAGTCTTTCTTCTTGGAAACATGACCAAGATTGGGCAAACGT ++ +fffffffffffffffffdffecfcefeffdcfdeeebbbdbccccc\db\`^aa`^Y^^^cbcbaa`bbWY^^^__S_YYR]GWY]\]]XX\_`S +@4118 +TCAAATTGTACTGCAAAGAAGGTCCCAGCTGGTCTCTTCTGGGAGTGATCTAACTAACTTAAGCTGACCCTGTGACTGGCTGAGGATAATCCCTT ++ +dc^ddeeeeeedeee`ceceddadadddcbde_dedc_ec_a^^b\b\\]VIPZY^T^^^\L_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@18416 +GTGGGGAAGCCGAAGAAGCAGCGGAGATCGATTGTAAGAACGACGTCCATGACCAGGGTTGGTGGAGACTGCTTCTCTGCATGCGGGGGAAGGCG ++ +dddacaabdbea\d^cce\da`dd_^__`a`a`b[_^__^\^^^_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@20115 +TGAAAAAGGAAAACATGGTAGTTTTCTTGTATGAGAGAGCCAGAGCCACCTTGGAGATTTTGTTCTCTCTGTGCGCACCAGTGATGACACAGGGG ++ +ed^eeafffaddfecdddabc^_badd`bd_ddadaa^bbcad\d\__^_\aaa_aY____aaN_\cdc\^aaYbBBBBBBBBBBBBBBBBBBBB +@16139 +TCATCCGAAGAGTTGGCAGGCCCTGTGAATTGTGAAAACAGTATACCCACCCCTTTCCCGGAGCAGGACGCTGAATGTCCAGAGGATGCCAGACC ++ +cabacacY^c\daaddaadad^\ad_a\Y`[ZQ]Y^^OYQ^X^YT\\]U\^RRX^\YJ^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@14123 +GATTTGGGGAAAGGAAACAATAGTTGAGTTTGGGCCACGGGAAATTCAAGATGCCTGGTATGTCAAGTCTGGCAGTTGAAGCAGCAGGGCTGGCG ++ +cccccccac^bYbbT_aa_Yb^^Ta\\^]]aaTaaaaab\b\XL`VZZV]QYYY[aa^^^^_^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +@8766 +ACCTGTAAGGTCCGCTCCTGGTGGACACCCACGAAGTCCAGGGCCTCAGGCAGGAAGTTGTAGCGCAGAGTTTTGAGCAGCTGCTCCATCAGGGA ++ +fcfffffcffeffeeefdefddeecdccacddfdYd`d^\_^`\_abbc\b[ba^Y^Z_^^H^Z_^Y_Y_OKWPZR]]Z]`Z``Z^UHZ^BBBBB diff --git a/tools/cutadapt/test-data/cutadapt/data/simple.fasta b/tools/cutadapt/test-data/cutadapt/data/simple.fasta new file mode 100644 index 00000000000..e5c1d4c31c2 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/simple.fasta @@ -0,0 +1,7 @@ +# a comment +# another one +>first_sequence +SEQUENCE1 +>second_sequence +SEQUEN +CE2 diff --git a/tools/cutadapt/test-data/cutadapt/data/simple.fasta.gz b/tools/cutadapt/test-data/cutadapt/data/simple.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..b4a329ffc5608db9854c662f5bfc9f15d57d4d8d GIT binary patch literal 99 zcmV-p0G$6HiwFqB(@13i19NF@aBO8RW?^%5VE|KBNK{D9&&^HEE8$XBNX*MG$w)0y z$j?jVvP;V>DlUmHPAx1=%}Y+@3U&<)b@g+0HRQ4@PEF3wOF@_7a&|T10su5^Jb+LD F008C^C<6ch literal 0 HcmV?d00001 From 8755a680ae8b124e0c2027c22f3984926d97113a Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 6 Nov 2023 13:00:15 +0100 Subject: [PATCH 14/23] cutadapt: do not use remote test data --- tools/cutadapt/cutadapt.xml | 20 +++++++++---------- .../test-data/cutadapt/cut/polya.1.fasta | 8 ++++++++ .../test-data/cutadapt/cut/polya.2.fasta | 8 ++++++++ .../cutadapt/cut/polya.legacy.1.fasta | 8 ++++++++ .../test-data/cutadapt/data/polya.1.fasta | 8 ++++++++ .../test-data/cutadapt/data/polya.2.fasta | 8 ++++++++ 6 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 tools/cutadapt/test-data/cutadapt/cut/polya.1.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/cut/polya.2.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/cut/polya.legacy.1.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/data/polya.1.fasta create mode 100644 tools/cutadapt/test-data/cutadapt/data/polya.2.fasta diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index bad3d349987..6c727d9f569 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -1136,14 +1136,14 @@ $read_mod_options.zero_cap - - + + - - + + @@ -1153,7 +1153,7 @@ $read_mod_options.zero_cap - +
@@ -1163,7 +1163,7 @@ $read_mod_options.zero_cap
- + @@ -1173,7 +1173,7 @@ $read_mod_options.zero_cap - +
@@ -1183,7 +1183,7 @@ $read_mod_options.zero_cap
- + @@ -1193,9 +1193,9 @@ $read_mod_options.zero_cap - + - + diff --git a/tools/cutadapt/test-data/cutadapt/cut/polya.1.fasta b/tools/cutadapt/test-data/cutadapt/cut/polya.1.fasta new file mode 100644 index 00000000000..9b8fd8f4bc3 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/polya.1.fasta @@ -0,0 +1,8 @@ +>polyA +AAACTTCAGAACAG +>polyAlong +CTTAGTTCAATWTTAACCAAACTTCAGAACAG +>polyA2 +AAACTTAACAAGAACAAG +>nopoly +GAAGAGTATCTCTCTGTCCTCTTGTCCGGCGTTACAGTAATGATCG diff --git a/tools/cutadapt/test-data/cutadapt/cut/polya.2.fasta b/tools/cutadapt/test-data/cutadapt/cut/polya.2.fasta new file mode 100644 index 00000000000..9eee1995b62 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/polya.2.fasta @@ -0,0 +1,8 @@ +>polyA +CTGTTCTGAAGTTT +>polyAlong +CTGTTCTGAAGTTTGGTTAAWATTGAACTAAG +>polyA2 +CTTGTTCTTGTTAAGTTT +>nopoly +TCTGAAGTTTGGTTAAWATTGAACTAA diff --git a/tools/cutadapt/test-data/cutadapt/cut/polya.legacy.1.fasta b/tools/cutadapt/test-data/cutadapt/cut/polya.legacy.1.fasta new file mode 100644 index 00000000000..f0a5c35ea04 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/polya.legacy.1.fasta @@ -0,0 +1,8 @@ +>polyA +AAACTTCAG +>polyAlong +CTTAGTTCAATWTTAACCAAACTTCAGAACAG +>polyA2 +AAACTTAAC +>nopoly +GAAGAGTATCTCTCTGTCCTCTTGTCCGGCGTTACAGTAATGATCG diff --git a/tools/cutadapt/test-data/cutadapt/data/polya.1.fasta b/tools/cutadapt/test-data/cutadapt/data/polya.1.fasta new file mode 100644 index 00000000000..dd6d5b9b3e9 --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/polya.1.fasta @@ -0,0 +1,8 @@ +>polyA +AAACTTCAGAACAGAAAAAAAAAAAAAAAAAAAAA +>polyAlong +CTTAGTTCAATWTTAACCAAACTTCAGAACAGAAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAAAA +>polyA2 +AAACTTAACAAGAACAAGAAAAAAAAAAAAAAAAAAAAA +>nopoly +GAAGAGTATCTCTCTGTCCTCTTGTCCGGCGTTACAGTAATGATCG diff --git a/tools/cutadapt/test-data/cutadapt/data/polya.2.fasta b/tools/cutadapt/test-data/cutadapt/data/polya.2.fasta new file mode 100644 index 00000000000..6f333bfc06a --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/polya.2.fasta @@ -0,0 +1,8 @@ +>polyA +TTTTTTTTTTTTTTTTTTTTTCTGTTCTGAAGTTT +>polyAlong +TTTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTTTCTGTTCTGAAGTTTGGTTAAWATTGAACTAAG +>polyA2 +TTTTTTTTTTTTTTTTTTTTTCTTGTTCTTGTTAAGTTT +>nopoly +TCTGAAGTTTGGTTAAWATTGAACTAA From 33e505e8720f9a5a053a6e63eedf2350863affe0 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 6 Nov 2023 13:11:41 +0100 Subject: [PATCH 15/23] cutadapt: better regex for quality cutoff param --- tools/cutadapt/cutadapt.xml | 6 +++++- tools/cutadapt/macros.xml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 6c727d9f569..df6b4a6bf99 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -256,7 +256,7 @@ $read_mod_options.zero_cap - [0-9,]+ + [0-9]+(,[0-9])? @@ -1129,6 +1129,10 @@ $read_mod_options.zero_cap + + + + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 1f4017b2463..548df9064c4 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -221,7 +221,7 @@ - [0-9,]+ + [0-9]+(,[0-9])? From 51f9c181ddc8e0c3d314fee021b5c5ade0fbae36 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 6 Nov 2023 13:55:20 +0100 Subject: [PATCH 16/23] cutadapt: fix --max-expected-errors and add --max-average-error-rate should be float --- tools/cutadapt/cutadapt.xml | 26 ++++++++++++++++++- .../test-data/cutadapt/cut/maxee.fastq | 8 ++++++ .../test-data/cutadapt/data/maxee.fastq | 16 ++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tools/cutadapt/test-data/cutadapt/cut/maxee.fastq create mode 100644 tools/cutadapt/test-data/cutadapt/data/maxee.fastq diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index df6b4a6bf99..1d515040752 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -245,7 +245,8 @@ $read_mod_options.zero_cap - + +
@@ -1244,6 +1245,29 @@ $read_mod_options.zero_cap + + + + + + + + + + + + + + + + + + + + + + + Date: Mon, 6 Nov 2023 15:57:29 +0100 Subject: [PATCH 17/23] cutadapt: make no_match_adapter_wildcards independent --- tools/cutadapt/cutadapt.xml | 54 +++++++++++++++++-- .../test-data/cutadapt/cut/wildcard.fa | 4 ++ .../test-data/cutadapt/cut/wildcardN.fa | 6 +++ .../test-data/cutadapt/data/wildcard.fa | 4 ++ .../test-data/cutadapt/data/wildcardN.fa | 6 +++ 5 files changed, 69 insertions(+), 5 deletions(-) create mode 100644 tools/cutadapt/test-data/cutadapt/cut/wildcard.fa create mode 100644 tools/cutadapt/test-data/cutadapt/cut/wildcardN.fa create mode 100644 tools/cutadapt/test-data/cutadapt/data/wildcard.fa create mode 100644 tools/cutadapt/test-data/cutadapt/data/wildcardN.fa diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 1d515040752..0b3ff658456 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -111,6 +111,7 @@ cutadapt --overlap=$adapter_options.overlap $adapter_options.no_indels $adapter_options.match_read_wildcards +$adapter_options.no_match_adapter_wildcards --action=$adapter_options.action $adapter_options.revcomp @@ -224,11 +225,8 @@ $read_mod_options.zero_cap - - - - - + +
@@ -1268,6 +1266,52 @@ $read_mod_options.zero_cap + + + + + +
+ + + + + + +
+
+ +
+ + + + + +
+ + + + + +
+ + + + + + +
+
+ + +
+ + + + + + +
1 +TGCATGCA +>2 +TGCATGCA diff --git a/tools/cutadapt/test-data/cutadapt/cut/wildcardN.fa b/tools/cutadapt/test-data/cutadapt/cut/wildcardN.fa new file mode 100644 index 00000000000..ef44dbc368f --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/cut/wildcardN.fa @@ -0,0 +1,6 @@ +>perfect +TTT +>withN +TTT +>1mism +TTTGGGGCGG diff --git a/tools/cutadapt/test-data/cutadapt/data/wildcard.fa b/tools/cutadapt/test-data/cutadapt/data/wildcard.fa new file mode 100644 index 00000000000..f482927106e --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/wildcard.fa @@ -0,0 +1,4 @@ +>1 +ANGTACGTTGCATGCA +>2 +ACGTANGTTGCATGCA diff --git a/tools/cutadapt/test-data/cutadapt/data/wildcardN.fa b/tools/cutadapt/test-data/cutadapt/data/wildcardN.fa new file mode 100644 index 00000000000..5c152660dca --- /dev/null +++ b/tools/cutadapt/test-data/cutadapt/data/wildcardN.fa @@ -0,0 +1,6 @@ +>perfect +TTTGGGGGGG +>withN +TTTGGNGGGG +>1mism +TTTGGGGCGG From 40284324516d85743ced8fdc77c6191fc73f2396 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 11 Dec 2023 12:39:03 +0100 Subject: [PATCH 18/23] temporarily disable paired fasta input https://github.com/marcelm/cutadapt/issues/746 --- tools/cutadapt/cutadapt.xml | 16 +++++++++------- tools/cutadapt/macros.xml | 3 +++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 0b3ff658456..c464cfde3e6 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -200,13 +200,13 @@ $read_mod_options.zero_cap - - + + - + @@ -1134,8 +1134,10 @@ $read_mod_options.zero_cap - - + + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 548df9064c4..7dd581a38c3 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -2,6 +2,9 @@ 4.5 0 fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta + + fastq.bz2,fastq.gz,fastq topic_0632 From 80df573de5f84a54217bd12ae27be8d4b8c1de8c Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 11 Dec 2023 16:19:04 +0100 Subject: [PATCH 19/23] fix tests --- tools/cutadapt/cutadapt.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index c464cfde3e6..fbb342cbf60 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -1282,9 +1282,9 @@ $read_mod_options.zero_cap
- +
- + @@ -1296,7 +1296,7 @@ $read_mod_options.zero_cap
- + @@ -1304,13 +1304,13 @@ $read_mod_options.zero_cap
- +
- + - + From 23dd2225535dc443ed1d6127e1a048d484264ef1 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 11 Dec 2023 16:19:32 +0100 Subject: [PATCH 20/23] Revert "temporarily disable paired fasta input" This reverts commit 40284324516d85743ced8fdc77c6191fc73f2396. --- tools/cutadapt/cutadapt.xml | 16 +++++++--------- tools/cutadapt/macros.xml | 3 --- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index fbb342cbf60..f15a36df2ef 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -200,13 +200,13 @@ $read_mod_options.zero_cap - - + + - + @@ -1134,10 +1134,8 @@ $read_mod_options.zero_cap
- - + @@ -1151,9 +1149,9 @@ $read_mod_options.zero_cap - + - --> + diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 7dd581a38c3..548df9064c4 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -2,9 +2,6 @@ 4.5 0 fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta - - fastq.bz2,fastq.gz,fastq topic_0632 From 099aca09dfc10cae6027573d0775eacc55fda516 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 12 Dec 2023 11:21:45 +0100 Subject: [PATCH 21/23] bump to 4.6 --- tools/cutadapt/macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 548df9064c4..e1b6a59efaf 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -1,5 +1,5 @@ - 4.5 + 4.6 0 fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta From 4c55ba97e1b01a565730fb57349465a2b196d65e Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 15 Dec 2023 16:16:13 +0100 Subject: [PATCH 22/23] fix test data --- tools/cutadapt/test-data/cutadapt_rest.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/cutadapt/test-data/cutadapt_rest.json b/tools/cutadapt/test-data/cutadapt_rest.json index 634d3a81f46..63a84f58e11 100644 --- a/tools/cutadapt/test-data/cutadapt_rest.json +++ b/tools/cutadapt/test-data/cutadapt_rest.json @@ -84,5 +84,7 @@ } } ], - "adapters_read2": null + "adapters_read2": null, + "poly_a_trimmed_read1": null, + "poly_a_trimmed_read2": null } From 68d7365134604c45e161e96cc9e5dfb8ea83f5d0 Mon Sep 17 00:00:00 2001 From: M Bernt Date: Sun, 17 Dec 2023 18:30:25 +0100 Subject: [PATCH 23/23] use is_of_type instead of ext str comparison --- tools/cutadapt/cutadapt.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index f15a36df2ef..bc5fe47f3bd 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -39,7 +39,7 @@ #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier)) #end if -#if $input_1.ext.startswith("fastq"): +#if $input_1.is_of_type("fastq"): #set ext = ".fq" #else #set ext = ".fa"