From efe65e98b18bd10ab424a395dfc5e6a0445df2b3 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:16:25 +0100
Subject: [PATCH 01/27] edited wrapper for enrichment to use the r script;
 renamed wrapper accordingly

---
 ...EN-enrichment-GO.xml => Rscript-enrichment-GO.xml} | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
 rename tools/MultiPEN/{MultiPEN-enrichment-GO.xml => Rscript-enrichment-GO.xml} (79%)
 mode change 100644 => 100755
diff --git a/tools/MultiPEN/MultiPEN-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
old mode 100644
new mode 100755
similarity index 79%
rename from tools/MultiPEN/MultiPEN-enrichment-GO.xml
rename to tools/MultiPEN/Rscript-enrichment-GO.xml
index 4df51ae6..d071a7d5
--- a/tools/MultiPEN/MultiPEN-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -1,13 +1,18 @@
-<tool id="MultiPEN-enrichment-GO" name="Enrichment with GO" version="0.0.3">
+<tool id="enrichGO" name="enrichGO" version="0.0.3">
     <description> (enrichment with Gene Ontology)</description>
     <requirements>
-        <requirement type="package" version="0.0.3">MultiPEN</requirement>
+        <requirement type="package" version="3.3.1">r</requirement>
+        <requirement type="package" version="3.0.5">bioconductor-clusterProfiler</requirement>
+        <requirement type="package" version="1.10">r-BBmisc</requirement>
+        <requirement type="package" version="3.3.0">bioconductor-GO.db</requirement>
+        <requirement type="package" version="3.3.0">bioconductor-org.Hs.eg.db</requirement>
+        <requirement type="package" version="0.4.1">r-pacman</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-run_MultiPEN_slurm.sh EnrichmentGO ./ '$rankings'
+        Rscript '$__tool_directory__'/enrichmentGO.R '$rankings'
     ]]></command>
     <inputs>
         <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of features (genes and/or metabolites). The rankings must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol names for genes or chEBI IDs for metabolites), 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />

From 3e7053d24a220f818e5bfc8fc64123d792218c3d Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:17:07 +0100
Subject: [PATCH 02/27] added r script to perform overrepresentation and gse
 analysis (using clusterProfiler package)

---
 tools/MultiPEN/enrichmentGO.R | 177 ++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100755 tools/MultiPEN/enrichmentGO.R

diff --git a/tools/MultiPEN/enrichmentGO.R b/tools/MultiPEN/enrichmentGO.R
new file mode 100755
index 00000000..3b16b2d1
--- /dev/null
+++ b/tools/MultiPEN/enrichmentGO.R
@@ -0,0 +1,177 @@
+# Script to run over-representation and gene set enrichment (GSE) analysis
+# with Gene Ontology for homo sapiens - using clusterProfiler package [Yu et al, 2012] 
+# [Yu et al., 2012] Yu G, Wang L, Han Y and He Q (2012), clusterProfiler: an R package 
+# for comparing biological themes among gene clusters.” OMICS: A Journal of Integrative Biology, 16(5), 
+# pp. 284-287. doi: 10.1089/omi.2011.0118. 
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Inputs: 
+#   File name            Tabular (txt) file with columns: (gene) name, value and ranking
+#   Output directory     
+# Outputs:
+#   enrichment-GO.txt      list of over-represented categories with GO
+#   enrichment-GO_BP.pdf   figure with the first over-representated biological processes terms
+#   enrichment-GO_MF.pdf   figure with the first over-representated molecular functions terms
+#   enrichment-GO_CC.pdf   figure with the first over-representated cellular components terms
+#   gse-GO.txt             results for gse analysis
+#
+#
+# It requieres R Packages:
+# clusterProfiler, https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html
+# BBmisc
+# GO.db
+# org.Hs.eg.db
+#
+# To run from a terminal use following command:
+# Rscript enrichmentGO.R '/path-to-file/file-name.txt' 'output-folder/'
+
+
+# Input arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide at least an input file, and optionally the output directory 
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file).n", call.=FALSE)
+} else if (length(args)==1) {
+  # if no output file is provided, use the default folder
+  outputDir = "./"
+} else if (length(args)==2) {
+  outputDir <- args[2]
+}
+
+dataFile <- args[1]
+cat(sprintf("Loading data from file: %s\n", dataFile))
+
+
+if(!dir.exists(outputDir)){
+  cat(sprintf("Creating output directory: %s\n", outputDir))
+  dir.create(outputDir)
+}
+
+# Load file with data for analysis
+data <- read.table(dataFile, header=TRUE, sep = "\t", stringsAsFactors=FALSE)
+cat(sprintf("Number of genes: %i\n",nrow(data)))
+
+
+library(clusterProfiler)
+library(BBmisc)
+library(GO.db)
+
+D <- sortByCol(data, 'ranking')
+D <- D[,c(1,2,3)]  # Only interested in the first three columns [name, value, ranking]
+entrez<-bitr(D$name, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db", drop = FALSE)
+ranked<-merge(D,entrez,by.x='name',by.y='SYMBOL')
+ranked <- sortByCol(ranked, 'ranking')
+geneList <- ranked$value
+names(geneList) <- ranked$ENTREZID
+
+
+#### Over-representation Analysis ####
+cat(sprintf("Performing over-representation analysis (enrichGO) ...  "))
+cat(sprintf("Results saved to folder: %s\n", outputDir))
+
+#Enrichment for subontology BP (Biological Process)
+subclassOnt <- "BP"
+enrichment_BP <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_BP <- as.data.frame(enrichment_BP)
+head(enrichmentSummary_BP)
+if(nrow(enrichmentSummary_BP)>0){
+  aux <- cbind(enrichmentSummary_BP, "BP")
+  colnames(aux)[10]<- 'subontology'
+  enrichmentSummary_BP <- aux        
+}  
+#add to results: enrichment for BP category
+results <- enrichmentSummary_BP
+
+#Enrichment for subontology MF (Molecular Function)
+subclassOnt <- "MF"
+enrichment_MF <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_MF <- as.data.frame(enrichment_MF)
+head(enrichmentSummary_MF)
+if(nrow(enrichmentSummary_MF)>0){
+  aux <- cbind(enrichmentSummary_MF, "MF")
+  colnames(aux)[10]<- 'subontology'
+  enrichmentSummary_MF <- aux       
+  #add to results: enrichment for MF category
+  results <- rbind(results, enrichmentSummary_MF)
+}  
+
+
+#Enrichment for subclass CC (Cellular Component)
+subclassOnt <- "CC"
+enrichment_CC <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_CC <- as.data.frame(enrichment_CC)
+head(enrichmentSummary_CC)
+if(nrow(enrichmentSummary_CC)>0){
+  aux <- cbind(enrichmentSummary_CC, "CC")
+  colnames(aux)[10]<- 'subontology'
+  enrichmentSummary_CC <- aux
+  #add to results: enrichment for CC category
+  results <- rbind(results, enrichmentSummary_CC)
+}
+
+# modify column names for consistency with MultiPEN and for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalue to qValue
+aux <- colnames(results)
+aux[c(5,6,7)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results to file: 
+fileName <- paste(outputDir, "enrichment-GO.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+
+fileName <- paste(outputDir, 'enrichment-GO_BP.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_BP, showCategory=20)
+dev.off()
+
+fileName <- paste(outputDir, 'enrichment-GO_MF.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_MF, drop=TRUE, showCategory=20)
+dev.off()
+
+fileName <- paste(outputDir, 'enrichment-GO_CC.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_CC, showCategory=20)
+dev.off()
+
+
+
+#### Gene Set Enrichment Analysis ####
+# GSE for all ontologies: BP, MF and CC
+kk <- gseGO(geneList, ont = "ALL", OrgDb="org.Hs.eg.db", keytype = "ENTREZID")
+results <- as.data.frame(kk)
+results <- sortByCol(results, 'setSize', asc = F)
+head(results)
+
+
+
+# modify column names for consistency for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalues to qValues
+aux <- colnames(results)
+aux[c(7,8,9)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file: 
+fileName <- paste(outputDir, "gse-GO.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file (currently not supported!):
+# fileName <- paste(outputDir, 'gse-GO.pdf', sep = "")
+# pdf(fileName)
+# barplot(kk)

From 4aa5da376fa86af714db3e2c54566336be2cfcf6 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:17:59 +0100
Subject: [PATCH 03/27] modified name of second column from weight to value

---
 .../test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt b/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
index 35c14bbb..1a361b65 100644
--- a/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
+++ b/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
@@ -1,4 +1,4 @@
-name	weight	ranking	foldChange	higherIn	case1	case2	case3	control1	control2	control3
+name	value	ranking	foldChange	higherIn	case1	case2	case3	control1	control2	control3
 SF3B1	0.99906704216243	1	-0.236470472	control	0.283198634	0.27896994	0.405474972	0.443509247	0.650759237	0.173061082
 PDIA2	0.998078594941955	2	-0.247818562	control	0.123381382	0.291498034	0.57227493	0.264566354	0.514119992	0.533702257
 TGFBR2	0.994602559989464	3	0.221089433	case	0.986051067	0.724247034	0.477134352	0.888992345	0.867667063	0.03471835

From 0f69a88ee25e98b01d7a313222af640445c5f56e Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:30:53 +0100
Subject: [PATCH 04/27] minor change

---
 tools/MultiPEN/enrichmentGO.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/MultiPEN/enrichmentGO.R b/tools/MultiPEN/enrichmentGO.R
index 3b16b2d1..69a4a55e 100755
--- a/tools/MultiPEN/enrichmentGO.R
+++ b/tools/MultiPEN/enrichmentGO.R
@@ -45,7 +45,7 @@ args = commandArgs(trailingOnly=TRUE)
 if (length(args)==0) {
   stop("At least one argument must be supplied (input file).n", call.=FALSE)
 } else if (length(args)==1) {
-  # if no output file is provided, use the default folder
+  # if no output file is provided, use current directory
   outputDir = "./"
 } else if (length(args)==2) {
   outputDir <- args[2]

From 0f48cddabe6ba9cddfd70ca3e45fb6dbf79e39e4 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:33:07 +0100
Subject: [PATCH 05/27] added r script to perform overrepresentation and gse
 analysis with KEGG (using clusterProfiler package)

---
 ...MultiPEN-enrichment-KEGG.xml => Rscript-enrichment-KEGG.xml} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tools/MultiPEN/{MultiPEN-enrichment-KEGG.xml => Rscript-enrichment-KEGG.xml} (96%)

diff --git a/tools/MultiPEN/MultiPEN-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
similarity index 96%
rename from tools/MultiPEN/MultiPEN-enrichment-KEGG.xml
rename to tools/MultiPEN/Rscript-enrichment-KEGG.xml
index e06b58a8..78e2517a 100644
--- a/tools/MultiPEN/MultiPEN-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -7,7 +7,7 @@
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-run_MultiPEN_slurm.sh EnrichmentKEGG ./ '$rankings'
+        Rscript '$__tool_directory__'/enrichmentKEGG.R '$rankings'
     ]]></command>
     <inputs>
         <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of features (genes and/or metabolites). The rankings must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol names for genes or chEBI IDs for metabolites), 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)" />

From 7cfdee30eeef8120f2941e254d0237aeddc15403 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:34:24 +0100
Subject: [PATCH 06/27] added r script to perform overrepresentation and gse
 analysis with KEGG (using clusterProfiler package)

---
 tools/MultiPEN/enrichmentKEGG.R | 121 ++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 tools/MultiPEN/enrichmentKEGG.R

diff --git a/tools/MultiPEN/enrichmentKEGG.R b/tools/MultiPEN/enrichmentKEGG.R
new file mode 100644
index 00000000..3e74aeae
--- /dev/null
+++ b/tools/MultiPEN/enrichmentKEGG.R
@@ -0,0 +1,121 @@
+# Script to run over-representation and gene set enrichment (GSE) analysis
+# with KEGG for homo sapiens - using clusterProfiler package [Yu et al, 2012] 
+# [Yu et al., 2012] Yu G, Wang L, Han Y and He Q (2012), clusterProfiler: an R package 
+# for comparing biological themes among gene clusters.” OMICS: A Journal of Integrative Biology, 16(5), 
+# pp. 284-287. doi: 10.1089/omi.2011.0118. 
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Inputs: 
+#   File name            Tabular (txt) file with columns: (gene) name, value and ranking
+#   Output directory     
+# Outputs:
+#   enrichment-KEGG.txt   list of over-represented categories with KEGG
+#   enrichment-KEGG.pdf   figure with the first over-representated categories
+#   gse-KEGG.txt          results for gse analysis
+#
+# It requieres R Packages:
+# clusterProfiler, https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html
+# BBmisc
+# org.Hs.eg.db
+#
+# To run script from a terminal use the command:
+# Rscript enrichmentKEGG.R 'path-to-directory/file-name.txt' 'path-to-output-folder/'
+
+# Input Arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide at least an input file, and optionally the output directory 
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file).n", call.=FALSE)
+} else if (length(args)==1) {
+  # if no output file is provided, use current directory
+  outputDir = "./"
+} else if (length(args)==2) {
+  outputDir <- args[2]
+}
+
+dataFile <- args[1]
+cat(sprintf("Loading data from file: %s\n", dataFile))
+
+if(!dir.exists(outputDir)){
+  cat(sprintf("Creating output directory: %s\n", outputDir))
+  dir.create(outputDir)
+}
+
+# Load file with data for analysis
+data <- read.table(dataFile, header=TRUE, sep = "\t", stringsAsFactors=FALSE)
+cat(sprintf("Number of genes: %i\n",nrow(data)))
+
+
+library(clusterProfiler)
+library(BBmisc)
+
+D <- sortByCol(data, 'ranking')
+D <- D[D[,2]!=0,]
+D <- D[,c(1,2,3)]  # Only interested in the first three columns [name, value, ranking]
+entrez<-bitr(D$name, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db", drop = FALSE)
+ranked<-merge(D,entrez,by.x='name',by.y='SYMBOL')
+ranked <- sortByCol(ranked, 'ranking')
+geneList <- ranked$value
+names(geneList) <- ranked$ENTREZID
+
+cat(sprintf("Performing over-representation analysis (with KEGG) ...  "))
+cat(sprintf("Results saved to folder: %s\n", outputDir))
+
+
+#### Enrichment with KEGG ####
+enrichment_kegg <- enrichKEGG(ranked$ENTREZID, organism = 'hsa', keyType = "kegg")
+results <- summary(enrichment_kegg)
+head(results)
+
+
+# modify column names for consistency with MultiPEN and for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalue to qValue
+aux <- colnames(results)
+aux[c(5,6,7)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file: 
+fileName <- paste(outputDir, "enrichment-KEGG.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file:
+fileName <- paste(outputDir, 'enrichment-KEGG.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_kegg, showCategory=20)
+
+
+#### Gene Set Enrichment with KEGG ####
+kk <- gseKEGG(geneList, organism = 'hsa', keyType = "kegg")
+results <- summary(kk)
+results
+
+
+# modify column names for consistency for valid MATLAB identifiers
+# change: p.adjust to pAdjust
+aux <- colnames(results)
+aux[c(6,7,8)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file: 
+fileName <- paste(outputDir, "gse-KEGG.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file (currently not supported!):
+# fileName <- paste(outputDir, 'gse-KEGG.pdf', sep = "")
+# pdf(fileName)
+# barplot(kk)
\ No newline at end of file

From de46abb01fde00bd73863b9cd1762f3aebf3d852 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:44:03 +0100
Subject: [PATCH 07/27] edited wrapper to remove pacman package from
 requirements

---
 tools/MultiPEN/Rscript-enrichment-GO.xml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/MultiPEN/Rscript-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
index d071a7d5..b05e203c 100755
--- a/tools/MultiPEN/Rscript-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -6,7 +6,6 @@
         <requirement type="package" version="1.10">r-BBmisc</requirement>
         <requirement type="package" version="3.3.0">bioconductor-GO.db</requirement>
         <requirement type="package" version="3.3.0">bioconductor-org.Hs.eg.db</requirement>
-        <requirement type="package" version="0.4.1">r-pacman</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" />
@@ -15,7 +14,7 @@
         Rscript '$__tool_directory__'/enrichmentGO.R '$rankings'
     ]]></command>
     <inputs>
-        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of features (genes and/or metabolites). The rankings must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol names for genes or chEBI IDs for metabolites), 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />
+        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol gene names, 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />
     </inputs>
     <outputs>
         <data name="enrichment-GO" format="txt" label="${tool.name}: over-representation analysis with Gene Ontology" from_work_dir="enrichment-GO.txt" />

From 02497048e94352a32bcdc0e52bc827333985b032 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:44:21 +0100
Subject: [PATCH 08/27] edited wrapper to remove pacman package from
 requirements

---
 tools/MultiPEN/Rscript-enrichment-KEGG.xml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/MultiPEN/Rscript-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
index 78e2517a..b092dce7 100644
--- a/tools/MultiPEN/Rscript-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -1,7 +1,10 @@
 <tool id="MultiPEN-enrichment-KEGG" name="Enrichment with KEGG" version="0.0.3">
     <description> (enrichment with KEGG)</description>
     <requirements>
-        <requirement type="package" version="0.0.3">MultiPEN</requirement>
+        <requirement type="package" version="3.3.1">r</requirement>
+        <requirement type="package" version="3.0.5">bioconductor-clusterProfiler</requirement>
+        <requirement type="package" version="1.10">r-BBmisc</requirement>
+        <requirement type="package" version="3.3.0">bioconductor-org.Hs.eg.db</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" />

From 29275a3cb0299e044247cf43e9cc828964d1c72e Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:56:17 +0100
Subject: [PATCH 09/27] renamed and modified wrapper to compile networw with
 STRINGdb which now calls the r script directly (instead of calling r script
 from MultiPEN)

---
 ...ringDBNetwork.xml => Rscript-STRINGdb-network.xml} | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
 rename tools/MultiPEN/{MultiPEN-StringDBNetwork.xml => Rscript-STRINGdb-network.xml} (64%)

diff --git a/tools/MultiPEN/MultiPEN-StringDBNetwork.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
similarity index 64%
rename from tools/MultiPEN/MultiPEN-StringDBNetwork.xml
rename to tools/MultiPEN/Rscript-STRINGdb-network.xml
index 6d74a525..f209ae81 100644
--- a/tools/MultiPEN/MultiPEN-StringDBNetwork.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,16 +1,17 @@
-<tool id="MultiPEN-StringDBNetwork" name="StringDB Network" version="0.0.3">
+<tool id="STRINGdb-network" name="STRINGdb-network" version="0.0.3">
     <description> Compiles network from list of genes</description>
     <requirements>
-        <requirement type="package" version="0.0.3">MultiPEN</requirement>
+        <requirement type="package" version="3.3.1">r</requirement>
+        <requirement type="package" version="3.0.5">bioconductor-STRINGdb</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-run_MultiPEN_slurm.sh StringDBNetwork ./ '$geneList' $SpeciesCode $threshold '$networkFileName'
+        Rscript '$__tool_directory__'/StringDBNetwork ./ '$geneList' $SpeciesCode $threshold '$networkFileName'
     ]]></command>
     <inputs>
-        <param name="geneList" type="data" format="tabular" label="list of genes" help="List of genes (use symbol names for genes) to compile a network" />
+        <param name="geneList" type="data" format="tabular" label="list of genes" help="List of genes (use symbol gene names) to compile a network" />
         <param name="SpeciesCode" type="integer" value="" label="Code for your species (i.e., 9606 for humans)" help="Use symbol names for genes to compile a network" />
         <param name="threshold" type="float" value="0.60" label="threshold for the combined score for the interactions" help="Default value set to 0.60" />
     </inputs>
@@ -24,7 +25,7 @@ run_MultiPEN_slurm.sh StringDBNetwork ./ '$geneList' $SpeciesCode $threshold '$n
         </test>
     </tests>
     <help><![CDATA[
-MultiPEN includes a Wrapper to use the R package STRINGdb (bioconductor): Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
+        MultiPEN includes a Wrapper to use the R package STRINGdb (bioconductor): Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
     ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gks1094</citation>

From e887d3ad86bc62c0562780398cb5be70bd54681b Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:56:55 +0100
Subject: [PATCH 10/27] r script to use STRINGdb to compile a network from a
 list of genes

---
 tools/MultiPEN/compileNetworkStringDB.R | 80 +++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 tools/MultiPEN/compileNetworkStringDB.R

diff --git a/tools/MultiPEN/compileNetworkStringDB.R b/tools/MultiPEN/compileNetworkStringDB.R
new file mode 100644
index 00000000..4ee2a649
--- /dev/null
+++ b/tools/MultiPEN/compileNetworkStringDB.R
@@ -0,0 +1,80 @@
+# getStringInteractome <- function(fileName, speciesCode, speciesName, networkFileName){
+  # Get StringInteractome Network 
+  
+  # Load file with list of genes
+  
+  #INPUT 
+  # fileName - table with column 'name'
+  # speciesCode = 9606  #homo sapiens
+  
+  # networkFileName = "SI_network.human.NormalisedExpressionLevels.csv"
+  
+
+# Input arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide at least an input file, and optionally the output directory 
+length(args)
+args
+if (length(args)!=4) {
+  stop("Please specify file name, species code, threshold and the name of the network", call.=FALSE)
+}
+
+fileName <- args[1]
+class(args[2])
+class(as.numeric(args[2]))
+speciesCode <- as.numeric(args[2]);
+threshold <- as.numeric(args[3]);
+networkFileName <- args[4];
+
+
+  # Read data, which needs to have at least the following two columns: [gene_id, shortName]
+  inputData <- read.delim( fileName, header = TRUE, sep = '\t', stringsAsFactors = FALSE)
+  #geneList <- inputData$name
+  
+  # begin compiling network
+  library(STRINGdb)
+  string_db <- STRINGdb$new( version="10", species = speciesCode, score_threshold=threshold, input_directory="" )
+  mapped <- string_db$map( inputData,  "name", removeUnmappedRows = TRUE )
+  
+  #get interactions 
+  inter<-string_db$get_interactions(mapped$STRING_id)
+  
+  #annotate source and target nodes
+  s <- paste(speciesCode, '.', sep = "")
+  from <- gsub(s, "", inter$from)
+  to <- gsub(s,"",inter$to)
+  #normalise combined_score values: divide by 1000
+  network <- data.frame(from = from, to = to, score = inter$combined_score/1000)
+  subNetwork <- network[network$score > threshold,] 
+  
+  #edit STRING_id (speciesCode.ENSPxxxxx) to remove speciesCode
+  mapped$StringID <- gsub(s, "", mapped$STRING_id)
+  mapped$STRING_id <- NULL
+  
+  # end compiling network
+  
+  #### network with gene names ####
+  nn <- dim(subNetwork)[1]
+  interactions <- matrix(data=NA,nrow=dim(subNetwork)[1], ncol=3)
+  for(ii in 1:nn){
+    interactions[ii,1] = mapped$name[mapped$StringID==subNetwork$from[ii]]
+    interactions[ii,2] = mapped$name[mapped$StringID==subNetwork$to[ii]]
+    interactions[ii,3] = subNetwork$score[ii]
+  }
+  
+  edges <- data.frame(source = interactions[,1], target = interactions[,2], score = interactions[,3])
+  
+  #write two files to run with GenePEN
+  cat(sprintf('\nSaving network (edges) to file: %s', networkFileName))
+  cat('. . .')
+  #fileName <- paste(networkFileName, '.txt', sep = "")
+  write.table(edges, networkFileName, sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
+  cat(sprintf('Done!'))
+  
+  
+  #write.table(mapped, '~/Desktop/temp/mapped.txt', sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
+  #write.table(subNetwork, '~/Desktop/temp/subnetwork.txt', sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
+#}
+
+

From bb9068dab068580f2b90336d8ef51ac4c5719b7c Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 18:57:52 +0100
Subject: [PATCH 11/27] renamed and modified wrapper to compile networw with
 STRINGdb which now calls the r script directly (instead of calling r script
 from MultiPEN)

---
 .../Rscript-network-from-STRINGdb.xml         | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100755 tools/MultiPEN/Rscript-network-from-STRINGdb.xml

diff --git a/tools/MultiPEN/Rscript-network-from-STRINGdb.xml b/tools/MultiPEN/Rscript-network-from-STRINGdb.xml
new file mode 100755
index 00000000..91c83772
--- /dev/null
+++ b/tools/MultiPEN/Rscript-network-from-STRINGdb.xml
@@ -0,0 +1,39 @@
+<tool id="STRINGdb-network" name="STRINGdb-network" version="0.0.3">
+    <description> Compiling network from list of genes </description>
+    <requirements>
+        <requirement type="package" version="3.3.1">r</requirement>
+        <requirement type="package" version="3.0.5">bioconductor-STRINGdb</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    <command><![CDATA[
+        Rscript '$__tool_directory__'/compileNetworkStringDB.R '$geneList' '$speciesCode' '$threshold' 'networkFileName'
+    ]]></command>
+    <inputs>
+        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol gene names, 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />
+    </inputs>
+    <outputs>
+        <data name="enrichment-GO" format="txt" label="${tool.name}: over-representation analysis with Gene Ontology" from_work_dir="enrichment-GO.txt" />
+        <data name="enrichment-GO_BP" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Biological Processes" from_work_dir="enrichment-GO_BP.pdf" />
+        <data name="enrichment-GO_MF" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Molecular Functions" from_work_dir="enrichment-GO_MF.pdf" />
+        <data name="enrichment-GO_CC" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Cellular Components" from_work_dir="enrichment-GO_CC.pdf" />
+        <data name="gse-GO" format="txt" label="Gene set enrichment analysis with Gene Ontology" from_work_dir="gse-GO.txt" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="rankings" ftype="tabular" value="MultiPEN-Rankings_lambda0.0001.txt" />
+            <output name="enrichment-GO" file="enrichment-GO" ftype="txt" />
+            <output name="enrichment-GO_BP" file="enrichment-GO_BP" ftype="pdf" />
+            <output name="enrichment-GO_MF" file="enrichment-GO_MF" ftype="pdf" />
+            <output name="enrichment-GO_CC" file="enrichment-GO_CC" ftype="pdf" />
+            <output name="gse-GO" file="gse-GO" ftype="txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+MultiPEN includes a Wrapper to use the R package clusterProfiler to perform over-representation analysis with Gene Ontology.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1089/omi.2011.0118</citation>
+    </citations>
+</tool>

From cd6c71dbd1e47e69b3d889851762adbf6b33651f Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 19:02:37 +0100
Subject: [PATCH 12/27] removed duplicated wrapper

---
 .../Rscript-network-from-STRINGdb.xml         | 39 -------------------
 1 file changed, 39 deletions(-)
 delete mode 100755 tools/MultiPEN/Rscript-network-from-STRINGdb.xml

diff --git a/tools/MultiPEN/Rscript-network-from-STRINGdb.xml b/tools/MultiPEN/Rscript-network-from-STRINGdb.xml
deleted file mode 100755
index 91c83772..00000000
--- a/tools/MultiPEN/Rscript-network-from-STRINGdb.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<tool id="STRINGdb-network" name="STRINGdb-network" version="0.0.3">
-    <description> Compiling network from list of genes </description>
-    <requirements>
-        <requirement type="package" version="3.3.1">r</requirement>
-        <requirement type="package" version="3.0.5">bioconductor-STRINGdb</requirement>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" level="fatal" />
-    </stdio>
-    <command><![CDATA[
-        Rscript '$__tool_directory__'/compileNetworkStringDB.R '$geneList' '$speciesCode' '$threshold' 'networkFileName'
-    ]]></command>
-    <inputs>
-        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol gene names, 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />
-    </inputs>
-    <outputs>
-        <data name="enrichment-GO" format="txt" label="${tool.name}: over-representation analysis with Gene Ontology" from_work_dir="enrichment-GO.txt" />
-        <data name="enrichment-GO_BP" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Biological Processes" from_work_dir="enrichment-GO_BP.pdf" />
-        <data name="enrichment-GO_MF" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Molecular Functions" from_work_dir="enrichment-GO_MF.pdf" />
-        <data name="enrichment-GO_CC" format="pdf" label="${tool.name}: over-representation analysis with Gene Ontology - Cellular Components" from_work_dir="enrichment-GO_CC.pdf" />
-        <data name="gse-GO" format="txt" label="Gene set enrichment analysis with Gene Ontology" from_work_dir="gse-GO.txt" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="rankings" ftype="tabular" value="MultiPEN-Rankings_lambda0.0001.txt" />
-            <output name="enrichment-GO" file="enrichment-GO" ftype="txt" />
-            <output name="enrichment-GO_BP" file="enrichment-GO_BP" ftype="pdf" />
-            <output name="enrichment-GO_MF" file="enrichment-GO_MF" ftype="pdf" />
-            <output name="enrichment-GO_CC" file="enrichment-GO_CC" ftype="pdf" />
-            <output name="gse-GO" file="gse-GO" ftype="txt" />
-        </test>
-    </tests>
-    <help><![CDATA[
-MultiPEN includes a Wrapper to use the R package clusterProfiler to perform over-representation analysis with Gene Ontology.
-    ]]></help>
-    <citations>
-        <citation type="doi">10.1089/omi.2011.0118</citation>
-    </citations>
-</tool>

From 48c79d1833c2e237fb989ba593303b81897d71a0 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Sat, 8 Apr 2017 19:04:51 +0100
Subject: [PATCH 13/27] edit Rscript-STRINGdb-network.xml file: minor edits to
 help section

---
 tools/MultiPEN/Rscript-STRINGdb-network.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index f209ae81..9ee533b3 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -25,7 +25,7 @@
         </test>
     </tests>
     <help><![CDATA[
-        MultiPEN includes a Wrapper to use the R package STRINGdb (bioconductor): Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
+        This wrapper compiles a PP network from a list of genes, using the R package STRINGdb (bioconductor): Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
     ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gks1094</citation>

From 09e2f2f56e0051a801b8b58b2371a37bf66146c0 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 10 Apr 2017 11:44:51 +0100
Subject: [PATCH 14/27] edited Rscript-enrichment-GO.xml to add references to
 gene ontology

---
 tools/MultiPEN/Rscript-enrichment-GO.xml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/MultiPEN/Rscript-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
index b05e203c..f82e1528 100755
--- a/tools/MultiPEN/Rscript-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -1,5 +1,5 @@
 <tool id="enrichGO" name="enrichGO" version="0.0.3">
-    <description> (enrichment with Gene Ontology)</description>
+    <description> over-representation and GSE analysis with Gene Ontology</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
         <requirement type="package" version="3.0.5">bioconductor-clusterProfiler</requirement>
@@ -14,7 +14,7 @@
         Rscript '$__tool_directory__'/enrichmentGO.R '$rankings'
     ]]></command>
     <inputs>
-        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol gene names, 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)." />
+        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which must be specified in a tab delimited text file with (at least) three columns: 'name' (this is the gene names), 'value' (used to rank the genes) and 'ranking' (where ranking 1 represent the most important feature)." />
     </inputs>
     <outputs>
         <data name="enrichment-GO" format="txt" label="${tool.name}: over-representation analysis with Gene Ontology" from_work_dir="enrichment-GO.txt" />
@@ -34,9 +34,12 @@
         </test>
     </tests>
     <help><![CDATA[
-MultiPEN includes a Wrapper to use the R package clusterProfiler to perform over-representation analysis with Gene Ontology.
+        This wrapper performs over-representation and gene set enrichment analysis from a list of genes using the bioconductor R package clusterProfiler and Gene Ontology.
     ]]></help>
     <citations>
         <citation type="doi">10.1089/omi.2011.0118</citation>
+        <citation type="doi">10.1038/75556</citation>
+        <citation type="doi">https://doi.org/10.1093/nar/gku1179</citation>
+        
     </citations>
 </tool>

From ea7f00c5bf484f1c116c3cc366e3de64fd012d8d Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 10 Apr 2017 12:05:56 +0100
Subject: [PATCH 15/27] edited Rscript-enrichment-GO.xml to add references to
 KEGG

---
 tools/MultiPEN/Rscript-enrichment-KEGG.xml | 23 ++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tools/MultiPEN/Rscript-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
index b092dce7..b6ea674d 100644
--- a/tools/MultiPEN/Rscript-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -1,5 +1,5 @@
-<tool id="MultiPEN-enrichment-KEGG" name="Enrichment with KEGG" version="0.0.3">
-    <description> (enrichment with KEGG)</description>
+<tool id="enrichKEGG" name="enrichKEGG" version="0.0.3">
+    <description> over-representation and GSE analysis with KEGG</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
         <requirement type="package" version="3.0.5">bioconductor-clusterProfiler</requirement>
@@ -13,7 +13,7 @@
         Rscript '$__tool_directory__'/enrichmentKEGG.R '$rankings'
     ]]></command>
     <inputs>
-        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of features (genes and/or metabolites). The rankings must be specified in a tab delimited text file with (at least) three columns: 'name' (use symbol names for genes or chEBI IDs for metabolites), 'value' (used to rank the features) and 'ranking' (ranking 1 represent the most important feature)" />
+        <param name="rankings" type="data" format="tabular" label="Rankings for all features" help="Ranking of genes which should be specified in a tabular delimited text file with (at least) three columns: 'name' (this are gene names), 'value' (used to rank the genes) and 'ranking' (where ranking 1 represent the most important feature)" />
     </inputs>
     <outputs>
         <data name="enrichment-KEGG" format="txt" label="${tool.name}: over-representation analysis with KEGG" from_work_dir="enrichment-KEGG.txt" />
@@ -29,9 +29,24 @@
         </test>
     </tests>
     <help><![CDATA[
-MultiPEN includes a Wrapper to use the R package ClusterProfiler [Yu et al., 2012] to perform over-representation analysis with KEGG.
+        This wrapper performs over-representation and gene set enrichment analysis from a list of genes using the bioconductor R package clusterProfiler and KEGG.
     ]]></help>
     <citations>
         <citation type="doi">10.1089/omi.2011.0118</citation>
+        <citation type="doi">10.1093/nar/gkw1092</citation>
+        <citation type="doi">10.1093/nar/gkv1070</citation>
+        <citation type="bibtex">@article{Kanehisa2000,
+            Author = {Kanehisa, M and Goto, S},
+            Crdt = {1999/12/11 09:00},
+            Date = {2000 Jan 01},
+            Journal = {Nucleic Acids Res},
+            Month = {Jan},
+            Number = {1},
+            Oid = {NLM: PMC102409},
+            Pages = {27--30},
+            Title = {KEGG: kyoto encyclopedia of genes and genomes.},
+            Volume = {28},
+            Year = {2000}}
+            </citation>
     </citations>
 </tool>

From 5dbe86b3da50559818f5e15b2ae0b9a67657c6e8 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 10 Apr 2017 12:22:52 +0100
Subject: [PATCH 16/27] edited Rscript-STRINGdb-network.xml to expand help on
 input for species code

---
 tools/MultiPEN/Rscript-STRINGdb-network.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index 9ee533b3..a39ed369 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,5 +1,5 @@
 <tool id="STRINGdb-network" name="STRINGdb-network" version="0.0.3">
-    <description> Compiles network from list of genes</description>
+    <description> Compile network from list of genes</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
         <requirement type="package" version="3.0.5">bioconductor-STRINGdb</requirement>
@@ -8,11 +8,11 @@
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-        Rscript '$__tool_directory__'/StringDBNetwork ./ '$geneList' $SpeciesCode $threshold '$networkFileName'
+        Rscript '$__tool_directory__'/compileNetworkStringDB.R ./ '$geneList' $SpeciesCode $threshold '$networkFileName'
     ]]></command>
     <inputs>
         <param name="geneList" type="data" format="tabular" label="list of genes" help="List of genes (use symbol gene names) to compile a network" />
-        <param name="SpeciesCode" type="integer" value="" label="Code for your species (i.e., 9606 for humans)" help="Use symbol names for genes to compile a network" />
+        <param name="SpeciesCode" type="integer" value="" label="NCBI taxonomy identifiers for your organism (i.e., 9606 for Human, 10090 for mouse)" help="If you don’t know your organism's identifier, you can search the NCBI Taxonomy from http://www.nlm.nih.gov/taxonomy"/>
         <param name="threshold" type="float" value="0.60" label="threshold for the combined score for the interactions" help="Default value set to 0.60" />
     </inputs>
     <outputs>
@@ -25,7 +25,7 @@
         </test>
     </tests>
     <help><![CDATA[
-        This wrapper compiles a PP network from a list of genes, using the R package STRINGdb (bioconductor): Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
+        This wrapper compiles a PPI network from a list of genes, using the bioconductor R package STRINGdb: Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
     ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gks1094</citation>

From a2b6afab130c7c2f900b993694637b8657aae6a9 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 10 Apr 2017 12:31:37 +0100
Subject: [PATCH 17/27] edited compileNetworkStringDB.R: added licence
 information, references, description of inputs and how to run Rscript

---
 tools/MultiPEN/compileNetworkStringDB.R | 135 +++++++++++++-----------
 1 file changed, 72 insertions(+), 63 deletions(-)

diff --git a/tools/MultiPEN/compileNetworkStringDB.R b/tools/MultiPEN/compileNetworkStringDB.R
index 4ee2a649..6b5545bf 100644
--- a/tools/MultiPEN/compileNetworkStringDB.R
+++ b/tools/MultiPEN/compileNetworkStringDB.R
@@ -1,80 +1,89 @@
-# getStringInteractome <- function(fileName, speciesCode, speciesName, networkFileName){
-  # Get StringInteractome Network 
-  
-  # Load file with list of genes
-  
-  #INPUT 
-  # fileName - table with column 'name'
-  # speciesCode = 9606  #homo sapiens
-  
-  # networkFileName = "SI_network.human.NormalisedExpressionLevels.csv"
+# Script to compile a Protein-Protein Interaction network using STRINGdb: 
+# "STRINGdb (Search Tool for the Retrieval of Interacting proteins database)"
+#    al. FAe (2013). “STRING v9.1: protein-protein interaction networks, with increased coverage and integration.” Nucleic Acids Research (Database issue), 41. 
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#Inputs: 
+# fileName - table with column 'name'
+# speciesCode = 9606  #homo sapiens
+# threshold = minimum combined score
+# networkFileName = "SI_network.human.NormalisedExpressionLevels.csv"
   
+# It requieres R Packages:
+# STRINGdb, http://bioconductor.org/packages/release/bioc/html/STRINGdb.html
+#
+# To run script from a terminal use the command:
+# Rscript copileNetworkStringDB.R 'path-to-directory/fileName.txt' speciesCode threshold 'path-to-output-folder/networkFileName.txt'
+
+
 
 # Input arguments
 args = commandArgs(trailingOnly=TRUE)
 
-# User must provide at least an input file, and optionally the output directory 
-length(args)
-args
+# User must provide all four input parameters 
 if (length(args)!=4) {
   stop("Please specify file name, species code, threshold and the name of the network", call.=FALSE)
 }
 
 fileName <- args[1]
-class(args[2])
-class(as.numeric(args[2]))
 speciesCode <- as.numeric(args[2]);
 threshold <- as.numeric(args[3]);
 networkFileName <- args[4];
 
 
-  # Read data, which needs to have at least the following two columns: [gene_id, shortName]
-  inputData <- read.delim( fileName, header = TRUE, sep = '\t', stringsAsFactors = FALSE)
-  #geneList <- inputData$name
-  
-  # begin compiling network
-  library(STRINGdb)
-  string_db <- STRINGdb$new( version="10", species = speciesCode, score_threshold=threshold, input_directory="" )
-  mapped <- string_db$map( inputData,  "name", removeUnmappedRows = TRUE )
-  
-  #get interactions 
-  inter<-string_db$get_interactions(mapped$STRING_id)
-  
-  #annotate source and target nodes
-  s <- paste(speciesCode, '.', sep = "")
-  from <- gsub(s, "", inter$from)
-  to <- gsub(s,"",inter$to)
-  #normalise combined_score values: divide by 1000
-  network <- data.frame(from = from, to = to, score = inter$combined_score/1000)
-  subNetwork <- network[network$score > threshold,] 
-  
-  #edit STRING_id (speciesCode.ENSPxxxxx) to remove speciesCode
-  mapped$StringID <- gsub(s, "", mapped$STRING_id)
-  mapped$STRING_id <- NULL
-  
-  # end compiling network
-  
-  #### network with gene names ####
-  nn <- dim(subNetwork)[1]
-  interactions <- matrix(data=NA,nrow=dim(subNetwork)[1], ncol=3)
-  for(ii in 1:nn){
-    interactions[ii,1] = mapped$name[mapped$StringID==subNetwork$from[ii]]
-    interactions[ii,2] = mapped$name[mapped$StringID==subNetwork$to[ii]]
-    interactions[ii,3] = subNetwork$score[ii]
-  }
-  
-  edges <- data.frame(source = interactions[,1], target = interactions[,2], score = interactions[,3])
-  
-  #write two files to run with GenePEN
-  cat(sprintf('\nSaving network (edges) to file: %s', networkFileName))
-  cat('. . .')
-  #fileName <- paste(networkFileName, '.txt', sep = "")
-  write.table(edges, networkFileName, sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
-  cat(sprintf('Done!'))
-  
-  
-  #write.table(mapped, '~/Desktop/temp/mapped.txt', sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
-  #write.table(subNetwork, '~/Desktop/temp/subnetwork.txt', sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
-#}
+# Read data, which needs to have at least the following two columns: [gene_id, shortName]
+inputData <- read.delim( fileName, header = TRUE, sep = '\t', stringsAsFactors = FALSE)
+
+
+#### begin compiling network ####
+library(STRINGdb)
+string_db <- STRINGdb$new( version="10", species = speciesCode, score_threshold=threshold, input_directory="" )
+mapped <- string_db$map( inputData,  "name", removeUnmappedRows = TRUE )
+
+#get interactions 
+inter<-string_db$get_interactions(mapped$STRING_id)
+
+#annotate source and target nodes
+s <- paste(speciesCode, '.', sep = "")
+from <- gsub(s, "", inter$from)
+to <- gsub(s,"",inter$to)
+#normalise combined_score values: divide by 1000
+network <- data.frame(from = from, to = to, score = inter$combined_score/1000)
+subNetwork <- network[network$score > threshold,] 
+
+#edit STRING_id (speciesCode.ENSPxxxxx) to remove speciesCode
+mapped$StringID <- gsub(s, "", mapped$STRING_id)
+mapped$STRING_id <- NULL
+
+
+
+#### network with gene names ####
+nn <- dim(subNetwork)[1]
+interactions <- matrix(data=NA,nrow=dim(subNetwork)[1], ncol=3)
+for(ii in 1:nn){
+  interactions[ii,1] = mapped$name[mapped$StringID==subNetwork$from[ii]]
+  interactions[ii,2] = mapped$name[mapped$StringID==subNetwork$to[ii]]
+  interactions[ii,3] = subNetwork$score[ii]
+}
+
+edges <- data.frame(source = interactions[,1], target = interactions[,2], score = interactions[,3])
 
+#write two files to run with GenePEN
+cat(sprintf('\nSaving network (edges) to file: %s', networkFileName))
+cat('. . .')
+#fileName <- paste(networkFileName, '.txt', sep = "")
+write.table(edges, networkFileName, sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
+cat(sprintf('Done!'))
 

From 2c1a637104d458c827ecf72e566e1d316d5ee328 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 17 Apr 2017 11:40:55 +0100
Subject: [PATCH 18/27] modified IDs to be lower case

---
 tools/MultiPEN/Rscript-STRINGdb-network.xml | 2 +-
 tools/MultiPEN/Rscript-enrichment-GO.xml    | 2 +-
 tools/MultiPEN/Rscript-enrichment-KEGG.xml  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index a39ed369..73c5a45a 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,4 +1,4 @@
-<tool id="STRINGdb-network" name="STRINGdb-network" version="0.0.3">
+<tool id="stringdb network" name="stringdb-network" version="0.0.3">
     <description> Compile network from list of genes</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
diff --git a/tools/MultiPEN/Rscript-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
index f82e1528..c316d8d6 100755
--- a/tools/MultiPEN/Rscript-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -1,4 +1,4 @@
-<tool id="enrichGO" name="enrichGO" version="0.0.3">
+<tool id="enrich go" name="enrich go" version="0.0.3">
     <description> over-representation and GSE analysis with Gene Ontology</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
diff --git a/tools/MultiPEN/Rscript-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
index b6ea674d..9a25184d 100644
--- a/tools/MultiPEN/Rscript-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -1,4 +1,4 @@
-<tool id="enrichKEGG" name="enrichKEGG" version="0.0.3">
+<tool id="enrich kegg" name="enrich kegg" version="0.0.3">
     <description> over-representation and GSE analysis with KEGG</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>

From 43b263f7be315a9c315ab990d4f976d9d47ea6f2 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 17 Apr 2017 18:45:44 +0100
Subject: [PATCH 19/27] modified MultiPEN-feature-selectiom.xml to include
 string with optional parameters for log2 transform, z-scores, decision
 threshold, max number of iterations

---
 tools/MultiPEN/MultiPEN-feature-selection.xml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/MultiPEN/MultiPEN-feature-selection.xml b/tools/MultiPEN/MultiPEN-feature-selection.xml
index cc6f4d8a..87b7f2dc 100644
--- a/tools/MultiPEN/MultiPEN-feature-selection.xml
+++ b/tools/MultiPEN/MultiPEN-feature-selection.xml
@@ -7,10 +7,7 @@
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda $DecisionThreshold
-#if str($MaxIter)
-    $MaxIter
-#end if
+run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda $optionalParameters
 &&
 mv MultiPEN-performance_feature-selection_lambda${lambda}.txt Performance.txt &&
 mv MultiPEN-Rankings_lambda${lambda}.txt Rankings.txt &&
@@ -23,8 +20,7 @@ mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-control.txt Rankings-higher
         <param name="Interactions" type="data" format="tabular" label="Interaction Network" help="Molecular interaction network, where every interaction is defined by three elements: the source  node (name of gene and/or metabolite), the target node (name of gene and/or metabolite) and the weight for the interaction (a real number in the range [0,1]), i.e., the confidence level. The interaction matrix is provided as a tabular file with three columns: source, target and weight, and where each row corresponds to an interaction."  />
         <param name="SampleClass" type="data" format="txt" label="SampleClass for the samples" help="Tabular file with two columns: sample and class. The sample column contains the names of all samples, which corresponds to the columns in 'ExpressionData'. The class for each sample is 1 for cases and 0 for control" />
         <param name="lambda" type="float" value="" label="Lambda for the optimisation function" help="" />
-        <param name="DecisionThreshold" type="float" value="0.5" label="Decision threshold for classification" help="Optional parameter, the default value is 0.5" />
-        <param name="MaxIter" type="integer" value="100" optional="true" label="Maximum number of iterations for optimisation" help="Optional parameter, the default value is 100" />
+        <param name="optionalParameters" type="string" value="000.500100" label="optional parameters" help="The digits of this string represent a sequence of parameters. The following triples define the position in the string, the parameter name and its possible values (i.e., the left most value in the string states whether the log2 transform has been used or not): {1, log2 transform, [0,1]}, {2, z-scores, [0,1]}, {3 to 6, decision threshold, e.g.: 0.60}, {7 to 10, max number of iterations, e.g.: 0300 (default value is set to 0100)}. For example, the default string shown, 000.500100, states that no log 2 transformed is used, no z-scores are used, decision threshold is 0.50 and a maximum of 100 iterations are used." />
     </inputs>
     <outputs>
         <data name="Config" format="txt" label="${tool.name} on ${on_string} (lambda $lambda): Configuration" from_work_dir="MultiPEN-feature-selection_config.txt" />

From 65df36d5e782c7828f40dc039ab504ec843451f3 Mon Sep 17 00:00:00 2001
From: Perla Troncoso Rey <perlatroncosorey@gmail.com>
Date: Mon, 17 Apr 2017 18:52:16 +0100
Subject: [PATCH 20/27] modified MultiPEN-feature-selectiom.xml to include
 string with optional parameters for log2 transform, z-scores, decision
 threshold, max number of iterations; optional string is also added at the end
 of all output file name

---
 tools/MultiPEN/MultiPEN-feature-selection.xml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/MultiPEN/MultiPEN-feature-selection.xml b/tools/MultiPEN/MultiPEN-feature-selection.xml
index 87b7f2dc..5c4d4dc2 100644
--- a/tools/MultiPEN/MultiPEN-feature-selection.xml
+++ b/tools/MultiPEN/MultiPEN-feature-selection.xml
@@ -9,18 +9,18 @@
     <command><![CDATA[
 run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda $optionalParameters
 &&
-mv MultiPEN-performance_feature-selection_lambda${lambda}.txt Performance.txt &&
-mv MultiPEN-Rankings_lambda${lambda}.txt Rankings.txt &&
-mv MultiPEN-vts_lambda${lambda}.txt vts.txt &&
-mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-cases.txt Rankings-higherInCases.txt &&
-mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-control.txt Rankings-higherInControl.txt
+mv MultiPEN-performance_feature-selection_lambda${lambda}_${optionalParameters}.txt Performance.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_${optionalParameters}.txt Rankings.txt &&
+mv MultiPEN-vts_lambda${lambda}_${optionalParameters}.txt vts.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-cases_${optionalParameters}.txt Rankings-higherInCases.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-control_${optionalParameters}.txt Rankings-higherInControl.txt
     ]]></command>
     <inputs>
         <param name="ExpressionData" type="data" format="tabular" label="Expression and/or levels for features (genes and/or metabolites)" help="Gene expression and/or metabolite levels" />
         <param name="Interactions" type="data" format="tabular" label="Interaction Network" help="Molecular interaction network, where every interaction is defined by three elements: the source  node (name of gene and/or metabolite), the target node (name of gene and/or metabolite) and the weight for the interaction (a real number in the range [0,1]), i.e., the confidence level. The interaction matrix is provided as a tabular file with three columns: source, target and weight, and where each row corresponds to an interaction."  />
         <param name="SampleClass" type="data" format="txt" label="SampleClass for the samples" help="Tabular file with two columns: sample and class. The sample column contains the names of all samples, which corresponds to the columns in 'ExpressionData'. The class for each sample is 1 for cases and 0 for control" />
         <param name="lambda" type="float" value="" label="Lambda for the optimisation function" help="" />
-        <param name="optionalParameters" type="string" value="000.500100" label="optional parameters" help="The digits of this string represent a sequence of parameters. The following triples define the position in the string, the parameter name and its possible values (i.e., the left most value in the string states whether the log2 transform has been used or not): {1, log2 transform, [0,1]}, {2, z-scores, [0,1]}, {3 to 6, decision threshold, e.g.: 0.60}, {7 to 10, max number of iterations, e.g.: 0300 (default value is set to 0100)}. For example, the default string shown, 000.500100, states that no log 2 transformed is used, no z-scores are used, decision threshold is 0.50 and a maximum of 100 iterations are used." />
+        <param name="optionalParameters" type="string" value="000.500100" label="Optional parameters" help="The digits of this string represent a sequence of parameters. The following triples define the position in the string, the parameter name and its possible values (i.e., the left most value in the string states whether the log2 transform has been used or not): {1, log2 transform, [0,1]}, {2, z-scores, [0,1]}, {3 to 6, decision threshold, e.g.: 0.60}, {7 to 10, max number of iterations, e.g.: 0300 (default value is set to 0100)}. For example, the default string shown, 000.500100, states that no log 2 transformed is used, no z-scores are used, decision threshold is 0.50 and a maximum of 100 iterations are used." />
     </inputs>
     <outputs>
         <data name="Config" format="txt" label="${tool.name} on ${on_string} (lambda $lambda): Configuration" from_work_dir="MultiPEN-feature-selection_config.txt" />

From 19d791441fceac7766e7632a6beba267413f51d9 Mon Sep 17 00:00:00 2001
From: wjurkowski <wiktor.jurkowski@gmail.com>
Date: Mon, 10 Jul 2017 15:40:55 +0100
Subject: [PATCH 21/27] Update Rscript-enrichment-KEGG.xml

---
 tools/MultiPEN/Rscript-enrichment-KEGG.xml | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/tools/MultiPEN/Rscript-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
index 9a25184d..626e6005 100644
--- a/tools/MultiPEN/Rscript-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -35,18 +35,6 @@
         <citation type="doi">10.1089/omi.2011.0118</citation>
         <citation type="doi">10.1093/nar/gkw1092</citation>
         <citation type="doi">10.1093/nar/gkv1070</citation>
-        <citation type="bibtex">@article{Kanehisa2000,
-            Author = {Kanehisa, M and Goto, S},
-            Crdt = {1999/12/11 09:00},
-            Date = {2000 Jan 01},
-            Journal = {Nucleic Acids Res},
-            Month = {Jan},
-            Number = {1},
-            Oid = {NLM: PMC102409},
-            Pages = {27--30},
-            Title = {KEGG: kyoto encyclopedia of genes and genomes.},
-            Volume = {28},
-            Year = {2000}}
-            </citation>
+        <citation type="doi">10.1093/nar/28.1.27</citation>
     </citations>
 </tool>

From d78173766a101eb8d30f4528243543e1aaa5f181 Mon Sep 17 00:00:00 2001
From: wjurkowski <wiktor.jurkowski@gmail.com>
Date: Mon, 10 Jul 2017 17:46:52 +0100
Subject: [PATCH 22/27] Update MultiPEN-feature-selection.xml

---
 tools/MultiPEN/MultiPEN-feature-selection.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/MultiPEN/MultiPEN-feature-selection.xml b/tools/MultiPEN/MultiPEN-feature-selection.xml
index 5c4d4dc2..f63001bf 100644
--- a/tools/MultiPEN/MultiPEN-feature-selection.xml
+++ b/tools/MultiPEN/MultiPEN-feature-selection.xml
@@ -7,7 +7,7 @@
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda $optionalParameters
+run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda '$optionalParameters'
 &&
 mv MultiPEN-performance_feature-selection_lambda${lambda}_${optionalParameters}.txt Performance.txt &&
 mv MultiPEN-Rankings_lambda${lambda}_${optionalParameters}.txt Rankings.txt &&

From b0b5fb81a2952092f324b00e7211a2fbd732866a Mon Sep 17 00:00:00 2001
From: wjurkowski <wiktor.jurkowski@gmail.com>
Date: Mon, 10 Jul 2017 19:13:02 +0100
Subject: [PATCH 23/27] fixes to R script and MultiPEN wrappers

---
 tools/MultiPEN/MultiPEN-feature-selection.xml | 12 ++++++------
 tools/MultiPEN/Rscript-STRINGdb-network.xml   |  2 +-
 tools/MultiPEN/Rscript-enrichment-GO.xml      |  9 ++++-----
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tools/MultiPEN/MultiPEN-feature-selection.xml b/tools/MultiPEN/MultiPEN-feature-selection.xml
index f63001bf..e68615a0 100644
--- a/tools/MultiPEN/MultiPEN-feature-selection.xml
+++ b/tools/MultiPEN/MultiPEN-feature-selection.xml
@@ -9,18 +9,18 @@
     <command><![CDATA[
 run_MultiPEN_slurm.sh FeatureSelection ./ '$ExpressionData' '$Interactions' '$SampleClass' $lambda '$optionalParameters'
 &&
-mv MultiPEN-performance_feature-selection_lambda${lambda}_${optionalParameters}.txt Performance.txt &&
-mv MultiPEN-Rankings_lambda${lambda}_${optionalParameters}.txt Rankings.txt &&
-mv MultiPEN-vts_lambda${lambda}_${optionalParameters}.txt vts.txt &&
-mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-cases_${optionalParameters}.txt Rankings-higherInCases.txt &&
-mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-control_${optionalParameters}.txt Rankings-higherInControl.txt
+mv MultiPEN-performance_feature-selection_lambda${lambda}_${'optionalParameters'}.txt Performance.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_${'optionalParameters'}.txt Rankings.txt &&
+mv MultiPEN-vts_lambda${lambda}_${'optionalParameters'}.txt vts.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-cases_${'optionalParameters'}.txt Rankings-higherInCases.txt &&
+mv MultiPEN-Rankings_lambda${lambda}_genes-higher-in-control_${'optionalParameters'}.txt Rankings-higherInControl.txt
     ]]></command>
     <inputs>
         <param name="ExpressionData" type="data" format="tabular" label="Expression and/or levels for features (genes and/or metabolites)" help="Gene expression and/or metabolite levels" />
         <param name="Interactions" type="data" format="tabular" label="Interaction Network" help="Molecular interaction network, where every interaction is defined by three elements: the source  node (name of gene and/or metabolite), the target node (name of gene and/or metabolite) and the weight for the interaction (a real number in the range [0,1]), i.e., the confidence level. The interaction matrix is provided as a tabular file with three columns: source, target and weight, and where each row corresponds to an interaction."  />
         <param name="SampleClass" type="data" format="txt" label="SampleClass for the samples" help="Tabular file with two columns: sample and class. The sample column contains the names of all samples, which corresponds to the columns in 'ExpressionData'. The class for each sample is 1 for cases and 0 for control" />
         <param name="lambda" type="float" value="" label="Lambda for the optimisation function" help="" />
-        <param name="optionalParameters" type="string" value="000.500100" label="Optional parameters" help="The digits of this string represent a sequence of parameters. The following triples define the position in the string, the parameter name and its possible values (i.e., the left most value in the string states whether the log2 transform has been used or not): {1, log2 transform, [0,1]}, {2, z-scores, [0,1]}, {3 to 6, decision threshold, e.g.: 0.60}, {7 to 10, max number of iterations, e.g.: 0300 (default value is set to 0100)}. For example, the default string shown, 000.500100, states that no log 2 transformed is used, no z-scores are used, decision threshold is 0.50 and a maximum of 100 iterations are used." />
+        <param name="optionalParameters" type="text" value="000.500100" label="Optional parameters" help="The digits of this string represent a sequence of parameters. The following triples define the position in the string, the parameter name and its possible values (i.e., the left most value in the string states whether the log2 transform has been used or not): {1, log2 transform, [0,1]}, {2, z-scores, [0,1]}, {3 to 6, decision threshold, e.g.: 0.60}, {7 to 10, max number of iterations, e.g.: 0300 (default value is set to 0100)}. For example, the default string shown, 000.500100, states that no log 2 transformed is used, no z-scores are used, decision threshold is 0.50 and a maximum of 100 iterations are used." />
     </inputs>
     <outputs>
         <data name="Config" format="txt" label="${tool.name} on ${on_string} (lambda $lambda): Configuration" from_work_dir="MultiPEN-feature-selection_config.txt" />
diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index 73c5a45a..d73584c5 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,4 +1,4 @@
-<tool id="stringdb network" name="stringdb-network" version="0.0.3">
+<tool id="stringdb_network" name="stringdb-network" version="0.0.3">
     <description> Compile network from list of genes</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
diff --git a/tools/MultiPEN/Rscript-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
index c316d8d6..a3c0f12c 100755
--- a/tools/MultiPEN/Rscript-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -1,9 +1,8 @@
-<tool id="enrich go" name="enrich go" version="0.0.3">
+<tool id="enrich_go" name="enrich go" version="0.0.3">
     <description> over-representation and GSE analysis with Gene Ontology</description>
     <requirements>
-        <requirement type="package" version="3.3.1">r</requirement>
         <requirement type="package" version="3.0.5">bioconductor-clusterProfiler</requirement>
-        <requirement type="package" version="1.10">r-BBmisc</requirement>
+        <requirement type="package" version="1.9">r-BBmisc</requirement>
         <requirement type="package" version="3.3.0">bioconductor-GO.db</requirement>
         <requirement type="package" version="3.3.0">bioconductor-org.Hs.eg.db</requirement>
     </requirements>
@@ -34,12 +33,12 @@
         </test>
     </tests>
     <help><![CDATA[
-        This wrapper performs over-representation and gene set enrichment analysis from a list of genes using the bioconductor R package clusterProfiler and Gene Ontology.
+        This wrapper performs over-representation and gene set enrichment analysis from a list of genes using the Bioconductor package clusterProfiler and Gene Ontology.
     ]]></help>
     <citations>
         <citation type="doi">10.1089/omi.2011.0118</citation>
         <citation type="doi">10.1038/75556</citation>
-        <citation type="doi">https://doi.org/10.1093/nar/gku1179</citation>
+        <citation type="doi">10.1093/nar/gku1179</citation>
         
     </citations>
 </tool>

From 35359321b29733148336e3f97c19ce01b08090bb Mon Sep 17 00:00:00 2001
From: wjurkowski <wiktor.jurkowski@gmail.com>
Date: Mon, 10 Jul 2017 21:41:52 +0100
Subject: [PATCH 24/27] fixes to R script wrapper

---
 tools/MultiPEN/Rscript-STRINGdb-network.xml | 4 ++--
 tools/MultiPEN/Rscript-enrichment-KEGG.xml  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index d73584c5..06a32c0d 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,4 +1,4 @@
-<tool id="stringdb_network" name="stringdb-network" version="0.0.3">
+<tool id="stringdb-network" name="stringdb-network" version="0.0.3">
     <description> Compile network from list of genes</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>
@@ -25,7 +25,7 @@
         </test>
     </tests>
     <help><![CDATA[
-        This wrapper compiles a PPI network from a list of genes, using the bioconductor R package STRINGdb: Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
+        This wrapper compiles a PPI network from a list of genes, using the Bioconductor package STRINGdb: Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
     ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gks1094</citation>
diff --git a/tools/MultiPEN/Rscript-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
index 626e6005..190da58b 100644
--- a/tools/MultiPEN/Rscript-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -1,4 +1,4 @@
-<tool id="enrich kegg" name="enrich kegg" version="0.0.3">
+<tool id="enrich_kegg" name="enrich kegg" version="0.0.3">
     <description> over-representation and GSE analysis with KEGG</description>
     <requirements>
         <requirement type="package" version="3.3.1">r</requirement>

From 335bfb06fbff1a35bcf26722e1c8f9e9aedccde4 Mon Sep 17 00:00:00 2001
From: Nicola Soranzo <nicola.soranzo@earlham.ac.uk>
Date: Fri, 18 Aug 2017 15:54:33 +0100
Subject: [PATCH 25/27] Fixes for stringdb-network tool

---
 tools/MultiPEN/Rscript-STRINGdb-network.xml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/Rscript-STRINGdb-network.xml
index 06a32c0d..11059b7d 100644
--- a/tools/MultiPEN/Rscript-STRINGdb-network.xml
+++ b/tools/MultiPEN/Rscript-STRINGdb-network.xml
@@ -1,19 +1,18 @@
-<tool id="stringdb-network" name="stringdb-network" version="0.0.3">
-    <description> Compile network from list of genes</description>
+<tool id="stringdb-network" name="STRINGdb network" version="0.0.4">
+    <description>Protein interaction network from gene list</description>
     <requirements>
-        <requirement type="package" version="3.3.1">r</requirement>
-        <requirement type="package" version="3.0.5">bioconductor-STRINGdb</requirement>
+        <requirement type="package" version="1.16.0">bioconductor-stringdb</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" />
     </stdio>
     <command><![CDATA[
-        Rscript '$__tool_directory__'/compileNetworkStringDB.R ./ '$geneList' $SpeciesCode $threshold '$networkFileName'
+Rscript '$__tool_directory__'/compileNetworkStringDB.R '$geneList' $SpeciesCode $threshold '$networkFileName'
     ]]></command>
     <inputs>
-        <param name="geneList" type="data" format="tabular" label="list of genes" help="List of genes (use symbol gene names) to compile a network" />
-        <param name="SpeciesCode" type="integer" value="" label="NCBI taxonomy identifiers for your organism (i.e., 9606 for Human, 10090 for mouse)" help="If you don’t know your organism's identifier, you can search the NCBI Taxonomy from http://www.nlm.nih.gov/taxonomy"/>
-        <param name="threshold" type="float" value="0.60" label="threshold for the combined score for the interactions" help="Default value set to 0.60" />
+        <param name="geneList" type="data" format="tabular" label="Gene symbol list" help="List of gene symbols to compile a network" />
+        <param name="SpeciesCode" type="integer" value="" label="NCBI taxonomy identifier for your organism (e.g. 9606 for human, 10090 for mouse)" help="If you don't know your organism identifier, you can search the NCBI Taxonomy from https://www.ncbi.nlm.nih.gov/taxonomy" />
+        <param name="threshold" type="float" value="0.60" label="Threshold for the combined score for the interactions" />
     </inputs>
     <outputs>
         <data name="networkFileName" format="txt" label="${tool.name}: use list of genes to compile PPI network using StringDB" />
@@ -21,11 +20,12 @@
     <tests>
         <test>
             <param name="geneList" ftype="tabular" value="geneList.txt" />
+            <param name="SpeciesCode" value="9606" />
             <output name="networkFileName" file="networkFromGeneList.txt" ftype="txt" />
         </test>
     </tests>
     <help><![CDATA[
-        This wrapper compiles a PPI network from a list of genes, using the Bioconductor package STRINGdb: Search Tool for the Retrieval of Interacting Proteins database (see http://bioconductor.org/packages/release/bioc/html/STRINGdb.html).
+This wrapper compiles a protein-protein interaction (PPI) network from a list of genes, using the Bioconductor package `STRINGdb <https://bioconductor.org/packages/release/bioc/html/STRINGdb.html>`_ (Search Tool for the Retrieval of Interacting Proteins database).
     ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gks1094</citation>

From 44385c374b743de2a8466483e817f2dad522651a Mon Sep 17 00:00:00 2001
From: Nicola Soranzo <nicola.soranzo@earlham.ac.uk>
Date: Fri, 18 Aug 2017 16:37:16 +0100
Subject: [PATCH 26/27] Rename tool XML file same as tool id

---
 .../{Rscript-STRINGdb-network.xml => stringdb-network.xml}        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tools/MultiPEN/{Rscript-STRINGdb-network.xml => stringdb-network.xml} (100%)

diff --git a/tools/MultiPEN/Rscript-STRINGdb-network.xml b/tools/MultiPEN/stringdb-network.xml
similarity index 100%
rename from tools/MultiPEN/Rscript-STRINGdb-network.xml
rename to tools/MultiPEN/stringdb-network.xml

From 2ab30b49002635dffd181ece1f0e6207288ebb1f Mon Sep 17 00:00:00 2001
From: Nicola Soranzo <nicola.soranzo@earlham.ac.uk>
Date: Fri, 18 Aug 2017 16:39:02 +0100
Subject: [PATCH 27/27] Fix file permissions

---
 tools/MultiPEN/Rscript-enrichment-GO.xml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tools/MultiPEN/Rscript-enrichment-GO.xml

diff --git a/tools/MultiPEN/Rscript-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
old mode 100755
new mode 100644