diff --git a/.travis/RUMP-test_aftermzmine.sh b/.travis/RUMP-test_aftermzmine.sh index 35395d3..910262f 100644 --- a/.travis/RUMP-test_aftermzmine.sh +++ b/.travis/RUMP-test_aftermzmine.sh @@ -1,2 +1,2 @@ -# Test processes after MZmine with sample data -./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker galaxydream/metabolomics_pipeline +# Test processes MZmine output files with sample data +./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0 diff --git a/.travis/RUMP-test_all.sh b/.travis/RUMP-test_all.sh index 1c5eed0..b6dc535 100644 --- a/.travis/RUMP-test_all.sh +++ b/.travis/RUMP-test_all.sh @@ -4,5 +4,5 @@ wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux.zip && unzip MZmine-2.53-Linux.zip && rm MZmine-2.53-Linux.zip # Test all processes with sample data -./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker galaxydream/metabolomics_pipeline +./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker xinsongdu/lemaslab_rump:v1.0.0 diff --git a/Dockerfile b/Dockerfile index ea4fa77..ad0a387 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ -# Dockerfile for UMPIRE +# Dockerfile for RUMP -FROM rocker/r-ver:3.5.2 +FROM rocker/rstudio:3.6.3 MAINTAINER xinsongdu@ufl.edu @@ -58,7 +58,11 @@ WORKDIR /app COPY accessibility.properties /app # Fix a bug for java -RUN mv accessibility.properties /etc/java-8-openjdk/ +# RUN mv accessibility.properties /etc/java-8-openjdk/ + +# install R packages +COPY r_package_install.R /app +RUN Rscript r_package_install.R # Install mummichog RUN pip install --upgrade setuptools diff --git a/README.md b/README.md index d375821..5044168 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux ``` 4. Pull singularity image if using high-performance computing (**if using local machine, skip this step**) ``` -mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v0.0.0 +mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v1.0.0 ``` # General Behavior @@ -78,11 +78,11 @@ Negative mode: - Create design files for positve data and negative data, indicating the group of each file, save them to `data/pos_design.csv` and `data/neg_design.csv`. Sample design file can be found in `data/sample_data/pos_design.csv` and `data/sample_data/neg_design.csv` - Process your data with default parameters using local machine ``` -nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` - Process your data with default parameters using high-performance computing (It is recommended to maximize CPU and memory in pos_peakDetection_mzmine and neg_peakDetection_mzmine processes in `nextflow.config` if using high-performance computing) ``` -nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0 ``` ### Process dataframe generatd by MZmine-2.53 @@ -91,7 +91,7 @@ nextflow main.nf --container singularity -with-singularity docker://xinsongdu/le - Create design files describing the group of each column of positive/negative data, save them to `data/pos_design.csv` and `data/neg_design.csv` - Get statistical analysis and pathway analysis ``` -nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` ### Help message @@ -113,7 +113,7 @@ Check https://github.com/lemaslab/RUMP for updates, and refer to https://github.com/lemaslab/RUMP/wiki Usage: - nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v0.0.0 + nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v1.0.0 Arguments (it is mandatory to change `input_file` and `mzmine_dir` before running: ----------------------------- common parameters ---------------------------------- @@ -128,7 +128,7 @@ Please refer to nextflow.config for more options. Container: Docker image to use with -with-docker|-with-singularity options is - 'docker://xinsongdu/lemaslab_rump:v0.0.0' + 'docker://xinsongdu/lemaslab_rump:v1.0.0' RUMP supports .mzXML format files. ``` @@ -163,13 +163,13 @@ RUMP returns the following exit status values: ### Running tests on local machine ``` -nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` ### Running tests on high-performance computing ``` -nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0 ``` # Bug reporting and feature requests diff --git a/main.nf b/main.nf index 871e7c5..dc67ac3 100644 --- a/main.nf +++ b/main.nf @@ -83,6 +83,10 @@ MQC_CONFIG = Channel.fromPath(params.mqc_config) PYTHON_MUMMICHOG_INPUT_PREPARE = Channel.fromPath(params.python_mummichog_input_prepare) PYTHON_MUMMICHOG_INPUT_PREPARE.into{PYTHON_MUMMICHOG_INPUT_PREPARE_NOBG; PYTHON_MUMMICHOG_INPUT_PREPARE_WITHBG} +// R code for unknown search +R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG; R_UNKNOWN_SEARCH_WITHBG} + // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') @@ -143,7 +147,7 @@ if (params.help) { exit 1 } -// Unit tests +// Check appropriateness of input process input_check { echo true @@ -278,8 +282,8 @@ process add_stats { """ } -POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG} -NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG} +POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG; POS_NOBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG; NEG_NOBG_FOR_UNKNOWN_SEARCH} // Background subtraction process blank_subtraction { @@ -311,8 +315,8 @@ process blank_subtraction { // split channel content for multiple-time use -POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG} -NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG} +POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG; POS_WITHBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG; NEG_WITHBG_FOR_UNKNOWN_SEARCH} // Process for generating files that can be parsed by MultiQC regarding peak numbers of different steps. process mqc_peak_number_comparison { @@ -568,6 +572,55 @@ process bar_plot_withbg { } +// unknown search for metabolites identified before blank subtraction +process unknown_search_nobg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_NOBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_NOBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_NOBG + + output: + file params.unknown_search_pos_nobg into UNKNOWN_SEARCH_POS_NOBG + file params.unknown_search_neg_nobg into UNKNOWN_SEARCH_NEG_NOBG + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_nobg} -o ${params.unknown_search_pos_nobg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_nobg} -o ${params.unknown_search_neg_nobg} + + """ + +} + +// unknown search for metabolites identified after blank subtraction +process unknown_search_withbg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_WITHBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_WITHBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_WITHBG + + output: + file params.unknown_search_pos_withbg into UNKNOWN_SEARCH_POS_WITHBG + file params.unknown_search_neg_withbg into UNKNOWN_SEARCH_NEG_WITHBG + + when: + params.bs == "1" + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_withbg} -o ${params.unknown_search_pos_withbg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_withbg} -o ${params.unknown_search_neg_withbg} + + """ + +} + process mqc_figs { publishDir './results/mqc/', mode: 'copy' diff --git a/nextflow.config b/nextflow.config index 7981297..785a044 100644 --- a/nextflow.config +++ b/nextflow.config @@ -106,8 +106,13 @@ params python_barplot = "./rump/bar_plot.py" data_info = "./rump/data_info.py" peak_number_comparison_path = "./rump/peak_number_comparison.py" - python_bs = "./rump/blank_subtraction.py" + r_unknown_search = "./rump/unknown_search.R" + + mz_col_pos_nobg = "row.m.z" + mz_col_neg_nobg = "row.m.z" + mz_col_pos_withbg = "row.m.z" + mz_col_neg_withbg = "row.m.z" mqc_dir = "./results/mqc/" experiments_info = "./rump/software_descriptions_mqc.txt" @@ -193,6 +198,14 @@ params barplot_neg_withbg = "neg_barplot_group1_withbg.png" barplot_neg_withbg_om = "neg_onlymatched_barplot_group1_withbg.png" + // outputs for unknown_search_nobg + unknown_search_pos_nobg = "unknown_search_pos_nobg.csv" + unknown_search_neg_nobg = "unknown_search_neg_nobg.csv" + + // outputs for unknown_search_withbg + unknown_search_pos_withbg = "unknown_search_pos_withbg.csv" + unknown_search_neg_withbg = "unknown_search_neg_withbg.csv" + // regarding mummichog python_mummichog_input_prepare = "./rump/mummichog_input_prepare.py" data_pos_nobg_group1_mummichog = "data_pos_nobg_group1_mummichog.txt" @@ -288,6 +301,18 @@ process cpus = 1 memory = '4 GB' } + withName: unknown_search_nobg + { + time = '15m' + cpus = 1 + memory = '4 GB' + } + withName: unknown_search_withbg + { + time = '15m' + cpus = 1 + memory = '4 GB' + } withName: mqc_figs { time = '15m' diff --git a/r_package_install.R b/r_package_install.R new file mode 100755 index 0000000..262f7b3 --- /dev/null +++ b/r_package_install.R @@ -0,0 +1,4 @@ +# install neccessary packages +list.of.packages <- c("cmmr", "optparse") +new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] +if(length(new.packages)) install.packages(new.packages) \ No newline at end of file diff --git a/rump/unknown_search.R b/rump/unknown_search.R new file mode 100755 index 0000000..1fee842 --- /dev/null +++ b/rump/unknown_search.R @@ -0,0 +1,59 @@ +# 2018.12.19. ask +# rm(list=ls(all=TRUE)) + +# 20 Digits Precision Representation +options(scipen=20) + +# Setting the correct working directory. +# NOTE!!! -> Can be linked differently on different computers. +# setwd("/Users/xinsongdu/mnt/projects/beach01/secimtools") + +library(optparse) # add this library to enable argparse arguments +library(cmmr) +options(warn=-1) + +## Define input and output arguments +option_list = list( + make_option(c("-i", "--input"), type="character", default="bovine_enriched_unknown.csv", + help="input data file"), + make_option(c("-c", "--mz_col"), type="character", default="row.m.z", + help="column name indicating m/z values"), + make_option(c("-n", "--ion"), type="character", default="positive", + help="ion mode"), + make_option(c("-o", "--output"), type="character", default="searched_unknown_pos_after_blank_subtraction.csv", + help="output csv file name") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +# read data +data <- read.csv(file=opt$input) + +# extract mz values from data +mzs = as.vector(data[['row.m.z']]) +# mzs = lapply(mzs,round,4) + +if (opt$ion=="negative"){ + adduct <- '["M-H"]' +} else { + adduct <- '["M+H"]' +} + +# batch search +batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch', + 'all-except-peptides', + '["all-except-mine"]', + 'mz', + opt$ion, + adduct, + 5, + 'ppm', + mzs) +if (typeof(batch_df)=="character"){ + data_merge <- data.frame(Empty=character()) +} else { + data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass') +} + +write.csv(data_merge, opt$output, row.names=TRUE) diff --git a/run_aftermzmine.nf b/run_aftermzmine.nf index 46aeec6..f6e63c3 100644 --- a/run_aftermzmine.nf +++ b/run_aftermzmine.nf @@ -80,6 +80,10 @@ NEG_DESIGN.into{NEG_DESIGN_FOR_AS; NEG_DESIGN_FOR_BS; NEG_DESIGN_FOR_PCA_NOBG; N // EXPERIMENTS_INFO = Channel.fromPath(params.experiments_info) // MQC_CONFIG = Channel.fromPath(params.mqc_config) +// R code for unknown search +R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG; R_UNKNOWN_SEARCH_WITHBG} + // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') @@ -183,8 +187,8 @@ process add_stats { } // split channel content for multiple-time use -POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG} -NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG} +POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG; POS_NOBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG; NEG_NOBG_FOR_UNKNOWN_SEARCH} // Background subtraction process blank_subtraction { @@ -211,8 +215,8 @@ process blank_subtraction { } // split channel content for multiple-time use -POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG} -NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG} +POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG; POS_WITHBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG; NEG_WITHBG_FOR_UNKNOWN_SEARCH} // Process for generating files that can be parsed by MultiQC regarding peak numbers of different steps. process mqc_peak_number_comparison { @@ -456,6 +460,55 @@ process bar_plot_withbg { } +// unknown search for metabolites identified before blank subtraction +process unknown_search_nobg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_NOBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_NOBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_NOBG + + output: + file params.unknown_search_pos_nobg into UNKNOWN_SEARCH_POS_NOBG + file params.unknown_search_neg_nobg into UNKNOWN_SEARCH_NEG_NOBG + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_nobg} -o ${params.unknown_search_pos_nobg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_nobg} -o ${params.unknown_search_neg_nobg} + + """ + +} + +// unknown search for metabolites identified after blank subtraction +process unknown_search_withbg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_WITHBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_WITHBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_WITHBG + + output: + file params.unknown_search_pos_withbg into UNKNOWN_SEARCH_POS_WITHBG + file params.unknown_search_neg_withbg into UNKNOWN_SEARCH_NEG_WITHBG + + when: + params.bs == "1" + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_withbg} -o ${params.unknown_search_pos_withbg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_withbg} -o ${params.unknown_search_neg_withbg} + + """ + +} + if (params.container != "Docker") { MAT_CONFIG_DIR = Channel.from('~/.config/matplotlib/') MAT_CONFIG_FILE = Channel.from('~/.config/matplotlib/matplotlibrc')