diff --git a/environment.yml b/environment.yml index ee2d355ed..7571f1fe1 100644 --- a/environment.yml +++ b/environment.yml @@ -26,7 +26,6 @@ dependencies: - preseq=2.0.3 - deeptools=3.2.1 - gffread=0.11.4 - - csvtk=0.18.2 - qualimap=2.2.2c - rseqc=3.0.0 - subread=1.6.4 diff --git a/main.nf b/main.nf index c453a9042..7157007a4 100644 --- a/main.nf +++ b/main.nf @@ -1029,10 +1029,13 @@ process featureCounts { """ } + + /* * STEP 10 - Merge featurecounts */ process merge_featureCounts { + label "mid_memory" tag "${input_files[0].baseName - '.sorted'}" publishDir "${params.outdir}/featureCounts", mode: 'copy' @@ -1043,10 +1046,14 @@ process merge_featureCounts { file 'merged_gene_counts.txt' into featurecounts_merged script: - //if we only have 1 file, just use cat and pipe output to csvtk. Else join all files first, and then remove unwanted column names. - def merge = input_files instanceof Path ? 'cat' : 'csvtk join -t -f "Geneid,Start,Length,End,Chr,Strand,gene_name"' + // Redirection (the `<()`) for the win! + // Geneid in 1st column and gene_name in 7th + gene_ids = "<(tail -n +2 ${input_files[0]} | cut -f1,7 )" + counts = input_files.collect{filename -> + // Remove first line and take third column + "<(tail -n +2 ${filename} | sed 's:.sorted.bam::' | cut -f8)"}.join(" ") """ - $merge $input_files | csvtk cut -t -f "-Start,-Chr,-End,-Length,-Strand" | sed 's/Aligned.sortedByCoord.out.markDups.bam//g' > merged_gene_counts.txt + paste $gene_ids $counts > merged_gene_counts.txt """ } @@ -1088,7 +1095,7 @@ if (params.pseudo_aligner == 'salmon'){ } process salmon_merge { - label 'low_memory' + label 'mid_memory' publishDir "${params.outdir}/salmon", mode: 'copy' input: