diff --git a/tools/MultiPEN/MultiPEN-StringDBNetwork.xml b/tools/MultiPEN/MultiPEN-StringDBNetwork.xml
deleted file mode 100644
index 6d74a525..00000000
--- a/tools/MultiPEN/MultiPEN-StringDBNetwork.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-
- Compiles network from list of genes
-
- MultiPEN
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 10.1093/nar/gks1094
-
-
diff --git a/tools/MultiPEN/MultiPEN-feature-selection.xml b/tools/MultiPEN/MultiPEN-feature-selection.xml
index cc6f4d8a..e68615a0 100644
--- a/tools/MultiPEN/MultiPEN-feature-selection.xml
+++ b/tools/MultiPEN/MultiPEN-feature-selection.xml
@@ -7,24 +7,20 @@
-
-
+
diff --git a/tools/MultiPEN/MultiPEN-enrichment-GO.xml b/tools/MultiPEN/Rscript-enrichment-GO.xml
similarity index 61%
rename from tools/MultiPEN/MultiPEN-enrichment-GO.xml
rename to tools/MultiPEN/Rscript-enrichment-GO.xml
index 4df51ae6..a3c0f12c 100644
--- a/tools/MultiPEN/MultiPEN-enrichment-GO.xml
+++ b/tools/MultiPEN/Rscript-enrichment-GO.xml
@@ -1,16 +1,19 @@
-
- (enrichment with Gene Ontology)
+
+ over-representation and GSE analysis with Gene Ontology
- MultiPEN
+ bioconductor-clusterProfiler
+ r-BBmisc
+ bioconductor-GO.db
+ bioconductor-org.Hs.eg.db
-
+
@@ -30,9 +33,12 @@ run_MultiPEN_slurm.sh EnrichmentGO ./ '$rankings'
10.1089/omi.2011.0118
+ 10.1038/75556
+ 10.1093/nar/gku1179
+
diff --git a/tools/MultiPEN/MultiPEN-enrichment-KEGG.xml b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
similarity index 51%
rename from tools/MultiPEN/MultiPEN-enrichment-KEGG.xml
rename to tools/MultiPEN/Rscript-enrichment-KEGG.xml
index e06b58a8..190da58b 100644
--- a/tools/MultiPEN/MultiPEN-enrichment-KEGG.xml
+++ b/tools/MultiPEN/Rscript-enrichment-KEGG.xml
@@ -1,16 +1,19 @@
-
- (enrichment with KEGG)
+
+ over-representation and GSE analysis with KEGG
- MultiPEN
+ r
+ bioconductor-clusterProfiler
+ r-BBmisc
+ bioconductor-org.Hs.eg.db
-
+
@@ -26,9 +29,12 @@ run_MultiPEN_slurm.sh EnrichmentKEGG ./ '$rankings'
10.1089/omi.2011.0118
+ 10.1093/nar/gkw1092
+ 10.1093/nar/gkv1070
+ 10.1093/nar/28.1.27
diff --git a/tools/MultiPEN/compileNetworkStringDB.R b/tools/MultiPEN/compileNetworkStringDB.R
new file mode 100644
index 00000000..6b5545bf
--- /dev/null
+++ b/tools/MultiPEN/compileNetworkStringDB.R
@@ -0,0 +1,89 @@
+# Script to compile a Protein-Protein Interaction network using STRINGdb:
+# "STRINGdb (Search Tool for the Retrieval of Interacting proteins database)"
+# al. FAe (2013). “STRING v9.1: protein-protein interaction networks, with increased coverage and integration.” Nucleic Acids Research (Database issue), 41.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#Inputs:
+# fileName - table with column 'name'
+# speciesCode = 9606 #homo sapiens
+# threshold = minimum combined score
+# networkFileName = "SI_network.human.NormalisedExpressionLevels.csv"
+
+# It requieres R Packages:
+# STRINGdb, http://bioconductor.org/packages/release/bioc/html/STRINGdb.html
+#
+# To run script from a terminal use the command:
+# Rscript copileNetworkStringDB.R 'path-to-directory/fileName.txt' speciesCode threshold 'path-to-output-folder/networkFileName.txt'
+
+
+
+# Input arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide all four input parameters
+if (length(args)!=4) {
+ stop("Please specify file name, species code, threshold and the name of the network", call.=FALSE)
+}
+
+fileName <- args[1]
+speciesCode <- as.numeric(args[2]);
+threshold <- as.numeric(args[3]);
+networkFileName <- args[4];
+
+
+# Read data, which needs to have at least the following two columns: [gene_id, shortName]
+inputData <- read.delim( fileName, header = TRUE, sep = '\t', stringsAsFactors = FALSE)
+
+
+#### begin compiling network ####
+library(STRINGdb)
+string_db <- STRINGdb$new( version="10", species = speciesCode, score_threshold=threshold, input_directory="" )
+mapped <- string_db$map( inputData, "name", removeUnmappedRows = TRUE )
+
+#get interactions
+inter<-string_db$get_interactions(mapped$STRING_id)
+
+#annotate source and target nodes
+s <- paste(speciesCode, '.', sep = "")
+from <- gsub(s, "", inter$from)
+to <- gsub(s,"",inter$to)
+#normalise combined_score values: divide by 1000
+network <- data.frame(from = from, to = to, score = inter$combined_score/1000)
+subNetwork <- network[network$score > threshold,]
+
+#edit STRING_id (speciesCode.ENSPxxxxx) to remove speciesCode
+mapped$StringID <- gsub(s, "", mapped$STRING_id)
+mapped$STRING_id <- NULL
+
+
+
+#### network with gene names ####
+nn <- dim(subNetwork)[1]
+interactions <- matrix(data=NA,nrow=dim(subNetwork)[1], ncol=3)
+for(ii in 1:nn){
+ interactions[ii,1] = mapped$name[mapped$StringID==subNetwork$from[ii]]
+ interactions[ii,2] = mapped$name[mapped$StringID==subNetwork$to[ii]]
+ interactions[ii,3] = subNetwork$score[ii]
+}
+
+edges <- data.frame(source = interactions[,1], target = interactions[,2], score = interactions[,3])
+
+#write two files to run with GenePEN
+cat(sprintf('\nSaving network (edges) to file: %s', networkFileName))
+cat('. . .')
+#fileName <- paste(networkFileName, '.txt', sep = "")
+write.table(edges, networkFileName, sep = '\t', col.names = T, row.names = FALSE, quote = FALSE)
+cat(sprintf('Done!'))
+
diff --git a/tools/MultiPEN/enrichmentGO.R b/tools/MultiPEN/enrichmentGO.R
new file mode 100755
index 00000000..69a4a55e
--- /dev/null
+++ b/tools/MultiPEN/enrichmentGO.R
@@ -0,0 +1,177 @@
+# Script to run over-representation and gene set enrichment (GSE) analysis
+# with Gene Ontology for homo sapiens - using clusterProfiler package [Yu et al, 2012]
+# [Yu et al., 2012] Yu G, Wang L, Han Y and He Q (2012), clusterProfiler: an R package
+# for comparing biological themes among gene clusters.” OMICS: A Journal of Integrative Biology, 16(5),
+# pp. 284-287. doi: 10.1089/omi.2011.0118.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# Inputs:
+# File name Tabular (txt) file with columns: (gene) name, value and ranking
+# Output directory
+# Outputs:
+# enrichment-GO.txt list of over-represented categories with GO
+# enrichment-GO_BP.pdf figure with the first over-representated biological processes terms
+# enrichment-GO_MF.pdf figure with the first over-representated molecular functions terms
+# enrichment-GO_CC.pdf figure with the first over-representated cellular components terms
+# gse-GO.txt results for gse analysis
+#
+#
+# It requieres R Packages:
+# clusterProfiler, https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html
+# BBmisc
+# GO.db
+# org.Hs.eg.db
+#
+# To run from a terminal use following command:
+# Rscript enrichmentGO.R '/path-to-file/file-name.txt' 'output-folder/'
+
+
+# Input arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide at least an input file, and optionally the output directory
+if (length(args)==0) {
+ stop("At least one argument must be supplied (input file).n", call.=FALSE)
+} else if (length(args)==1) {
+ # if no output file is provided, use current directory
+ outputDir = "./"
+} else if (length(args)==2) {
+ outputDir <- args[2]
+}
+
+dataFile <- args[1]
+cat(sprintf("Loading data from file: %s\n", dataFile))
+
+
+if(!dir.exists(outputDir)){
+ cat(sprintf("Creating output directory: %s\n", outputDir))
+ dir.create(outputDir)
+}
+
+# Load file with data for analysis
+data <- read.table(dataFile, header=TRUE, sep = "\t", stringsAsFactors=FALSE)
+cat(sprintf("Number of genes: %i\n",nrow(data)))
+
+
+library(clusterProfiler)
+library(BBmisc)
+library(GO.db)
+
+D <- sortByCol(data, 'ranking')
+D <- D[,c(1,2,3)] # Only interested in the first three columns [name, value, ranking]
+entrez<-bitr(D$name, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db", drop = FALSE)
+ranked<-merge(D,entrez,by.x='name',by.y='SYMBOL')
+ranked <- sortByCol(ranked, 'ranking')
+geneList <- ranked$value
+names(geneList) <- ranked$ENTREZID
+
+
+#### Over-representation Analysis ####
+cat(sprintf("Performing over-representation analysis (enrichGO) ... "))
+cat(sprintf("Results saved to folder: %s\n", outputDir))
+
+#Enrichment for subontology BP (Biological Process)
+subclassOnt <- "BP"
+enrichment_BP <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_BP <- as.data.frame(enrichment_BP)
+head(enrichmentSummary_BP)
+if(nrow(enrichmentSummary_BP)>0){
+ aux <- cbind(enrichmentSummary_BP, "BP")
+ colnames(aux)[10]<- 'subontology'
+ enrichmentSummary_BP <- aux
+}
+#add to results: enrichment for BP category
+results <- enrichmentSummary_BP
+
+#Enrichment for subontology MF (Molecular Function)
+subclassOnt <- "MF"
+enrichment_MF <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_MF <- as.data.frame(enrichment_MF)
+head(enrichmentSummary_MF)
+if(nrow(enrichmentSummary_MF)>0){
+ aux <- cbind(enrichmentSummary_MF, "MF")
+ colnames(aux)[10]<- 'subontology'
+ enrichmentSummary_MF <- aux
+ #add to results: enrichment for MF category
+ results <- rbind(results, enrichmentSummary_MF)
+}
+
+
+#Enrichment for subclass CC (Cellular Component)
+subclassOnt <- "CC"
+enrichment_CC <- enrichGO(ranked$ENTREZID, OrgDb="org.Hs.eg.db", ont=subclassOnt, readable=TRUE)
+enrichmentSummary_CC <- as.data.frame(enrichment_CC)
+head(enrichmentSummary_CC)
+if(nrow(enrichmentSummary_CC)>0){
+ aux <- cbind(enrichmentSummary_CC, "CC")
+ colnames(aux)[10]<- 'subontology'
+ enrichmentSummary_CC <- aux
+ #add to results: enrichment for CC category
+ results <- rbind(results, enrichmentSummary_CC)
+}
+
+# modify column names for consistency with MultiPEN and for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalue to qValue
+aux <- colnames(results)
+aux[c(5,6,7)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results to file:
+fileName <- paste(outputDir, "enrichment-GO.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+
+fileName <- paste(outputDir, 'enrichment-GO_BP.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_BP, showCategory=20)
+dev.off()
+
+fileName <- paste(outputDir, 'enrichment-GO_MF.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_MF, drop=TRUE, showCategory=20)
+dev.off()
+
+fileName <- paste(outputDir, 'enrichment-GO_CC.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_CC, showCategory=20)
+dev.off()
+
+
+
+#### Gene Set Enrichment Analysis ####
+# GSE for all ontologies: BP, MF and CC
+kk <- gseGO(geneList, ont = "ALL", OrgDb="org.Hs.eg.db", keytype = "ENTREZID")
+results <- as.data.frame(kk)
+results <- sortByCol(results, 'setSize', asc = F)
+head(results)
+
+
+
+# modify column names for consistency for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalues to qValues
+aux <- colnames(results)
+aux[c(7,8,9)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file:
+fileName <- paste(outputDir, "gse-GO.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file (currently not supported!):
+# fileName <- paste(outputDir, 'gse-GO.pdf', sep = "")
+# pdf(fileName)
+# barplot(kk)
diff --git a/tools/MultiPEN/enrichmentKEGG.R b/tools/MultiPEN/enrichmentKEGG.R
new file mode 100644
index 00000000..3e74aeae
--- /dev/null
+++ b/tools/MultiPEN/enrichmentKEGG.R
@@ -0,0 +1,121 @@
+# Script to run over-representation and gene set enrichment (GSE) analysis
+# with KEGG for homo sapiens - using clusterProfiler package [Yu et al, 2012]
+# [Yu et al., 2012] Yu G, Wang L, Han Y and He Q (2012), clusterProfiler: an R package
+# for comparing biological themes among gene clusters.” OMICS: A Journal of Integrative Biology, 16(5),
+# pp. 284-287. doi: 10.1089/omi.2011.0118.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# Inputs:
+# File name Tabular (txt) file with columns: (gene) name, value and ranking
+# Output directory
+# Outputs:
+# enrichment-KEGG.txt list of over-represented categories with KEGG
+# enrichment-KEGG.pdf figure with the first over-representated categories
+# gse-KEGG.txt results for gse analysis
+#
+# It requieres R Packages:
+# clusterProfiler, https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html
+# BBmisc
+# org.Hs.eg.db
+#
+# To run script from a terminal use the command:
+# Rscript enrichmentKEGG.R 'path-to-directory/file-name.txt' 'path-to-output-folder/'
+
+# Input Arguments
+args = commandArgs(trailingOnly=TRUE)
+
+# User must provide at least an input file, and optionally the output directory
+if (length(args)==0) {
+ stop("At least one argument must be supplied (input file).n", call.=FALSE)
+} else if (length(args)==1) {
+ # if no output file is provided, use current directory
+ outputDir = "./"
+} else if (length(args)==2) {
+ outputDir <- args[2]
+}
+
+dataFile <- args[1]
+cat(sprintf("Loading data from file: %s\n", dataFile))
+
+if(!dir.exists(outputDir)){
+ cat(sprintf("Creating output directory: %s\n", outputDir))
+ dir.create(outputDir)
+}
+
+# Load file with data for analysis
+data <- read.table(dataFile, header=TRUE, sep = "\t", stringsAsFactors=FALSE)
+cat(sprintf("Number of genes: %i\n",nrow(data)))
+
+
+library(clusterProfiler)
+library(BBmisc)
+
+D <- sortByCol(data, 'ranking')
+D <- D[D[,2]!=0,]
+D <- D[,c(1,2,3)] # Only interested in the first three columns [name, value, ranking]
+entrez<-bitr(D$name, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db", drop = FALSE)
+ranked<-merge(D,entrez,by.x='name',by.y='SYMBOL')
+ranked <- sortByCol(ranked, 'ranking')
+geneList <- ranked$value
+names(geneList) <- ranked$ENTREZID
+
+cat(sprintf("Performing over-representation analysis (with KEGG) ... "))
+cat(sprintf("Results saved to folder: %s\n", outputDir))
+
+
+#### Enrichment with KEGG ####
+enrichment_kegg <- enrichKEGG(ranked$ENTREZID, organism = 'hsa', keyType = "kegg")
+results <- summary(enrichment_kegg)
+head(results)
+
+
+# modify column names for consistency with MultiPEN and for valid MATLAB identifiers
+# change: pvalue to pValue, p.adjust to pAdjust, qvalue to qValue
+aux <- colnames(results)
+aux[c(5,6,7)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file:
+fileName <- paste(outputDir, "enrichment-KEGG.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file:
+fileName <- paste(outputDir, 'enrichment-KEGG.pdf', sep = "")
+pdf(fileName)
+barplot(enrichment_kegg, showCategory=20)
+
+
+#### Gene Set Enrichment with KEGG ####
+kk <- gseKEGG(geneList, organism = 'hsa', keyType = "kegg")
+results <- summary(kk)
+results
+
+
+# modify column names for consistency for valid MATLAB identifiers
+# change: p.adjust to pAdjust
+aux <- colnames(results)
+aux[c(6,7,8)] <- c("pValue", "pAdjust", "qValue")
+colnames(results) <- aux
+
+#write results (table) to file:
+fileName <- paste(outputDir, "gse-KEGG.txt", sep = "")
+cat(sprintf("writing results to file: %s\n", fileName))
+write.table(results, fileName, sep = '\t', row.names = FALSE)
+
+# save plot to file (currently not supported!):
+# fileName <- paste(outputDir, 'gse-KEGG.pdf', sep = "")
+# pdf(fileName)
+# barplot(kk)
\ No newline at end of file
diff --git a/tools/MultiPEN/stringdb-network.xml b/tools/MultiPEN/stringdb-network.xml
new file mode 100644
index 00000000..11059b7d
--- /dev/null
+++ b/tools/MultiPEN/stringdb-network.xml
@@ -0,0 +1,33 @@
+
+ Protein interaction network from gene list
+
+ bioconductor-stringdb
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_ (Search Tool for the Retrieval of Interacting Proteins database).
+ ]]>
+
+ 10.1093/nar/gks1094
+
+
diff --git a/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt b/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
index 35c14bbb..1a361b65 100644
--- a/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
+++ b/tools/MultiPEN/test-data/MultiPEN-Rankings_lambda0.0001-onlyGenes.txt
@@ -1,4 +1,4 @@
-name weight ranking foldChange higherIn case1 case2 case3 control1 control2 control3
+name value ranking foldChange higherIn case1 case2 case3 control1 control2 control3
SF3B1 0.99906704216243 1 -0.236470472 control 0.283198634 0.27896994 0.405474972 0.443509247 0.650759237 0.173061082
PDIA2 0.998078594941955 2 -0.247818562 control 0.123381382 0.291498034 0.57227493 0.264566354 0.514119992 0.533702257
TGFBR2 0.994602559989464 3 0.221089433 case 0.986051067 0.724247034 0.477134352 0.888992345 0.867667063 0.03471835