From 743adf3cfa9c155259c372e9e071a0936f7978bb Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Wed, 22 Apr 2020 22:39:45 -0400 Subject: [PATCH 1/9] add unknown search --- .travis/RUMP-test_aftermzmine.sh | 2 +- .travis/RUMP-test_all.sh | 2 +- Dockerfile | 10 +++-- README.md | 16 ++++---- main.nf | 63 +++++++++++++++++++++++++++++--- nextflow.config | 10 ++++- r_package_install.R | 4 ++ rump/unknown_search.R | 57 +++++++++++++++++++++++++++++ run_aftermzmine.nf | 61 +++++++++++++++++++++++++++++-- 9 files changed, 202 insertions(+), 23 deletions(-) create mode 100755 r_package_install.R create mode 100755 rump/unknown_search.R diff --git a/.travis/RUMP-test_aftermzmine.sh b/.travis/RUMP-test_aftermzmine.sh index 35395d3..ff99e65 100644 --- a/.travis/RUMP-test_aftermzmine.sh +++ b/.travis/RUMP-test_aftermzmine.sh @@ -1,2 +1,2 @@ # Test processes after MZmine with sample data -./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker galaxydream/metabolomics_pipeline +./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0 diff --git a/.travis/RUMP-test_all.sh b/.travis/RUMP-test_all.sh index 1c5eed0..b6dc535 100644 --- a/.travis/RUMP-test_all.sh +++ b/.travis/RUMP-test_all.sh @@ -4,5 +4,5 @@ wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux.zip && unzip MZmine-2.53-Linux.zip && rm MZmine-2.53-Linux.zip # Test all processes with sample data -./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker galaxydream/metabolomics_pipeline +./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker xinsongdu/lemaslab_rump:v1.0.0 diff --git a/Dockerfile b/Dockerfile index ea4fa77..ad0a387 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ -# Dockerfile for UMPIRE +# Dockerfile for RUMP -FROM rocker/r-ver:3.5.2 +FROM rocker/rstudio:3.6.3 MAINTAINER xinsongdu@ufl.edu @@ -58,7 +58,11 @@ WORKDIR /app COPY accessibility.properties /app # Fix a bug for java -RUN mv accessibility.properties /etc/java-8-openjdk/ +# RUN mv accessibility.properties /etc/java-8-openjdk/ + +# install R packages +COPY r_package_install.R /app +RUN Rscript r_package_install.R # Install mummichog RUN pip install --upgrade setuptools diff --git a/README.md b/README.md index d375821..5044168 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux ``` 4. Pull singularity image if using high-performance computing (**if using local machine, skip this step**) ``` -mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v0.0.0 +mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v1.0.0 ``` # General Behavior @@ -78,11 +78,11 @@ Negative mode: - Create design files for positve data and negative data, indicating the group of each file, save them to `data/pos_design.csv` and `data/neg_design.csv`. Sample design file can be found in `data/sample_data/pos_design.csv` and `data/sample_data/neg_design.csv` - Process your data with default parameters using local machine ``` -nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` - Process your data with default parameters using high-performance computing (It is recommended to maximize CPU and memory in pos_peakDetection_mzmine and neg_peakDetection_mzmine processes in `nextflow.config` if using high-performance computing) ``` -nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0 ``` ### Process dataframe generatd by MZmine-2.53 @@ -91,7 +91,7 @@ nextflow main.nf --container singularity -with-singularity docker://xinsongdu/le - Create design files describing the group of each column of positive/negative data, save them to `data/pos_design.csv` and `data/neg_design.csv` - Get statistical analysis and pathway analysis ``` -nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` ### Help message @@ -113,7 +113,7 @@ Check https://github.com/lemaslab/RUMP for updates, and refer to https://github.com/lemaslab/RUMP/wiki Usage: - nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v0.0.0 + nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v1.0.0 Arguments (it is mandatory to change `input_file` and `mzmine_dir` before running: ----------------------------- common parameters ---------------------------------- @@ -128,7 +128,7 @@ Please refer to nextflow.config for more options. Container: Docker image to use with -with-docker|-with-singularity options is - 'docker://xinsongdu/lemaslab_rump:v0.0.0' + 'docker://xinsongdu/lemaslab_rump:v1.0.0' RUMP supports .mzXML format files. ``` @@ -163,13 +163,13 @@ RUMP returns the following exit status values: ### Running tests on local machine ``` -nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0 ``` ### Running tests on high-performance computing ``` -nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0 +nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0 ``` # Bug reporting and feature requests diff --git a/main.nf b/main.nf index 871e7c5..cf89723 100644 --- a/main.nf +++ b/main.nf @@ -83,6 +83,10 @@ MQC_CONFIG = Channel.fromPath(params.mqc_config) PYTHON_MUMMICHOG_INPUT_PREPARE = Channel.fromPath(params.python_mummichog_input_prepare) PYTHON_MUMMICHOG_INPUT_PREPARE.into{PYTHON_MUMMICHOG_INPUT_PREPARE_NOBG; PYTHON_MUMMICHOG_INPUT_PREPARE_WITHBG} +// R code for unknown search +R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG, R_UNKNOWN_SEARCH_WITHBG} + // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') @@ -143,7 +147,7 @@ if (params.help) { exit 1 } -// Unit tests +// Check appropriateness of input process input_check { echo true @@ -278,8 +282,8 @@ process add_stats { """ } -POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG} -NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG} +POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG; POS_NOBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG; NEG_NOBG_FOR_UNKNOWN_SEARCH} // Background subtraction process blank_subtraction { @@ -311,8 +315,8 @@ process blank_subtraction { // split channel content for multiple-time use -POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG} -NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG} +POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG; POS_WITHBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG; NEG_WITHBG_FOR_UNKNOWN_SEARCH} // Process for generating files that can be parsed by MultiQC regarding peak numbers of different steps. process mqc_peak_number_comparison { @@ -568,6 +572,55 @@ process bar_plot_withbg { } +// unknown search for metabolites identified before blank subtraction +process unknown_search_nobg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_NOBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_NOBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_NOBG + + output: + file params.unknown_search_pos_nobg into UNKNOWN_SEARCH_POS_NOBG + file params.unknown_search_neg_nobg into UNKNOWN_SEARCH_NEG_NOBG + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_nobg} -o ${params.unknown_search_pos_nobg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_nobg} -o ${params.unknown_search_neg_nobg} + + """ + +} + +// unknown search for metabolites identified after blank subtraction +process unknown_search_withbg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_WITHBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_WITHBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_WITHBG + + output: + file params.unknown_search_pos_withbg into UNKNOWN_SEARCH_POS_WITHBG + file params.unknown_search_neg_withbg into UNKNOWN_SEARCH_NEG_WITHBG + + when: + params.bs == "1" + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_withbg} -o ${params.unknown_search_pos_withbg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_withbg} -o ${params.unknown_search_neg_withbg} + + """ + +} + process mqc_figs { publishDir './results/mqc/', mode: 'copy' diff --git a/nextflow.config b/nextflow.config index 7981297..8c14b51 100644 --- a/nextflow.config +++ b/nextflow.config @@ -106,8 +106,8 @@ params python_barplot = "./rump/bar_plot.py" data_info = "./rump/data_info.py" peak_number_comparison_path = "./rump/peak_number_comparison.py" - python_bs = "./rump/blank_subtraction.py" + r_unknown_search = "./rump/unknown_search.R" mqc_dir = "./results/mqc/" experiments_info = "./rump/software_descriptions_mqc.txt" @@ -193,6 +193,14 @@ params barplot_neg_withbg = "neg_barplot_group1_withbg.png" barplot_neg_withbg_om = "neg_onlymatched_barplot_group1_withbg.png" + // outputs for unknown_search_nobg + unknown_search_pos_nobg = "unknown_search_pos_nobg.csv" + unknown_search_neg_nobg = "unknown_search_neg_nobg.csv" + + // outputs for unknown_search_withbg + unknown_search_pos_withbg = "unknown_search_pos_withbg.csv" + unknown_search_neg_withbg = "unknown_search_neg_withbg.csv" + // regarding mummichog python_mummichog_input_prepare = "./rump/mummichog_input_prepare.py" data_pos_nobg_group1_mummichog = "data_pos_nobg_group1_mummichog.txt" diff --git a/r_package_install.R b/r_package_install.R new file mode 100755 index 0000000..262f7b3 --- /dev/null +++ b/r_package_install.R @@ -0,0 +1,4 @@ +# install neccessary packages +list.of.packages <- c("cmmr", "optparse") +new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] +if(length(new.packages)) install.packages(new.packages) \ No newline at end of file diff --git a/rump/unknown_search.R b/rump/unknown_search.R new file mode 100755 index 0000000..7654ae4 --- /dev/null +++ b/rump/unknown_search.R @@ -0,0 +1,57 @@ +# 2018.12.19. ask +rm(list=ls(all=TRUE)) + +# 20 Digits Precision Representation +options(scipen=20) + +# Setting the correct working directory. +# NOTE!!! -> Can be linked differently on different computers. +# setwd("/Users/xinsongdu/mnt/projects/beach01/secimtools") +# Extra check +getwd() +setwd("/Users/xinsongdu/mnt/projects/HumanVSBovine_Milk/results_HumanVSBovine/peak_table") + +library(optparse) # add this library to enable argparse arguments +library(cmmr) +options(warn=-1) + +## Define input and output arguments +option_list = list( + make_option(c("-i", "--input"), type="character", default="bovine_enriched_unknown.csv", + help="input data file"), + make_option(c("-c", "--mz_col"), type="character", default="row.m.z", + help="column name indicating m/z values"), + make_option(c("-n", "--ion"), type="character", default="positive", + help="ion mode"), + make_option(c("-o", "--output"), type="character", default="searched_unknown_pos_after_blank_subtraction.csv", + help="output csv file name") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +# read data +data <- read.csv(file=opt$input) + +# extract mz values from data +mzs = as.vector(data[['row.m.z']]) +# mzs = lapply(mzs,round,4) + +if (opt$ion=="negative"){ + adduct <- "negative" +} else { + adduct <- "positive" +} + +# batch search +batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch', + 'all-except-peptides', + '["all-except-mine"]', + 'mz', + opt$ion, + adduct, + 5, + 'ppm', + mzs) +data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass') +write.csv(data_merge, opt$output, row.names=TRUE) diff --git a/run_aftermzmine.nf b/run_aftermzmine.nf index 46aeec6..c6dee17 100644 --- a/run_aftermzmine.nf +++ b/run_aftermzmine.nf @@ -80,6 +80,10 @@ NEG_DESIGN.into{NEG_DESIGN_FOR_AS; NEG_DESIGN_FOR_BS; NEG_DESIGN_FOR_PCA_NOBG; N // EXPERIMENTS_INFO = Channel.fromPath(params.experiments_info) // MQC_CONFIG = Channel.fromPath(params.mqc_config) +// R code for unknown search +R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG, R_UNKNOWN_SEARCH_WITHBG} + // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') @@ -183,8 +187,8 @@ process add_stats { } // split channel content for multiple-time use -POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG} -NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG} +POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG; POS_NOBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG; NEG_NOBG_FOR_UNKNOWN_SEARCH} // Background subtraction process blank_subtraction { @@ -211,8 +215,8 @@ process blank_subtraction { } // split channel content for multiple-time use -POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG} -NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG} +POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG; POS_WITHBG_FOR_UNKNOWN_SEARCH} +NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG; NEG_WITHBG_FOR_UNKNOWN_SEARCH} // Process for generating files that can be parsed by MultiQC regarding peak numbers of different steps. process mqc_peak_number_comparison { @@ -456,6 +460,55 @@ process bar_plot_withbg { } +// unknown search for metabolites identified before blank subtraction +process unknown_search_nobg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_NOBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_NOBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_NOBG + + output: + file params.unknown_search_pos_nobg into UNKNOWN_SEARCH_POS_NOBG + file params.unknown_search_neg_nobg into UNKNOWN_SEARCH_NEG_NOBG + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_nobg} -o ${params.unknown_search_pos_nobg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_nobg} -o ${params.unknown_search_neg_nobg} + + """ + +} + +// unknown search for metabolites identified after blank subtraction +process unknown_search_withbg { + + publishDir './results/peak_table/', mode: 'copy' + + input: + file data_pos from POS_WITHBG_FOR_UNKNOWN_SEARCH + file data_neg from NEG_WITHBG_FOR_UNKNOWN_SEARCH + file r_unknown_search from R_UNKNOWN_SEARCH_WITHBG + + output: + file params.unknown_search_pos_withbg into UNKNOWN_SEARCH_POS_WITHBG + file params.unknown_search_neg_withbg into UNKNOWN_SEARCH_NEG_WITHBG + + when: + params.bs == "1" + + shell: + """ + Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_withbg} -o ${params.unknown_search_pos_withbg} && + Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_withbg} -o ${params.unknown_search_neg_withbg} + + """ + +} + if (params.container != "Docker") { MAT_CONFIG_DIR = Channel.from('~/.config/matplotlib/') MAT_CONFIG_FILE = Channel.from('~/.config/matplotlib/matplotlibrc') From 410d31b709fc3bbd6d50f22e563bbdad097a99ce Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Wed, 22 Apr 2020 22:49:14 -0400 Subject: [PATCH 2/9] debug --- main.nf | 2 +- run_aftermzmine.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index cf89723..dc67ac3 100644 --- a/main.nf +++ b/main.nf @@ -85,7 +85,7 @@ PYTHON_MUMMICHOG_INPUT_PREPARE.into{PYTHON_MUMMICHOG_INPUT_PREPARE_NOBG; PYTHON_ // R code for unknown search R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) -R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG, R_UNKNOWN_SEARCH_WITHBG} +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG; R_UNKNOWN_SEARCH_WITHBG} // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') diff --git a/run_aftermzmine.nf b/run_aftermzmine.nf index c6dee17..f6e63c3 100644 --- a/run_aftermzmine.nf +++ b/run_aftermzmine.nf @@ -82,7 +82,7 @@ NEG_DESIGN.into{NEG_DESIGN_FOR_AS; NEG_DESIGN_FOR_BS; NEG_DESIGN_FOR_PCA_NOBG; N // R code for unknown search R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search) -R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG, R_UNKNOWN_SEARCH_WITHBG} +R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG; R_UNKNOWN_SEARCH_WITHBG} // Result files used by MultiQC to generate report. // MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir') From 3a423f0a255ede3b6a5ca50e90b3761f558f041b Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Wed, 22 Apr 2020 23:31:15 -0400 Subject: [PATCH 3/9] debug --- rump/unknown_search.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/rump/unknown_search.R b/rump/unknown_search.R index 7654ae4..01cf561 100755 --- a/rump/unknown_search.R +++ b/rump/unknown_search.R @@ -7,9 +7,6 @@ options(scipen=20) # Setting the correct working directory. # NOTE!!! -> Can be linked differently on different computers. # setwd("/Users/xinsongdu/mnt/projects/beach01/secimtools") -# Extra check -getwd() -setwd("/Users/xinsongdu/mnt/projects/HumanVSBovine_Milk/results_HumanVSBovine/peak_table") library(optparse) # add this library to enable argparse arguments library(cmmr) From d6dd5398fe6d68975e22b2fe0be89b32a1835873 Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Thu, 23 Apr 2020 00:45:32 -0400 Subject: [PATCH 4/9] debug --- nextflow.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow.config b/nextflow.config index 8c14b51..8b82984 100644 --- a/nextflow.config +++ b/nextflow.config @@ -109,6 +109,11 @@ params python_bs = "./rump/blank_subtraction.py" r_unknown_search = "./rump/unknown_search.R" + mz_col_pos_nobg = "row.m.z" + mz_col_neg_nobg = "row.m.z" + mz_col_pos_withbg = "row.m.z" + mz_col_neg_withbg = "row.m.z" + mqc_dir = "./results/mqc/" experiments_info = "./rump/software_descriptions_mqc.txt" mqc_config = "./rump/multiqc_config.yaml" From ae6bde20e632fda1bfc9d9e92f6fff3f9a9d5d83 Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Thu, 23 Apr 2020 11:49:56 -0400 Subject: [PATCH 5/9] debug --- rump/unknown_search.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rump/unknown_search.R b/rump/unknown_search.R index 01cf561..b314899 100755 --- a/rump/unknown_search.R +++ b/rump/unknown_search.R @@ -35,9 +35,9 @@ mzs = as.vector(data[['row.m.z']]) # mzs = lapply(mzs,round,4) if (opt$ion=="negative"){ - adduct <- "negative" + adduct <- '["M-H"]' } else { - adduct <- "positive" + adduct <- '["M+H"]' } # batch search From 8fd2c0b7da749d14e8d2c218029e970aea1bd5b3 Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Thu, 23 Apr 2020 23:15:04 -0400 Subject: [PATCH 6/9] debug --- rump/unknown_search.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rump/unknown_search.R b/rump/unknown_search.R index b314899..fd18809 100755 --- a/rump/unknown_search.R +++ b/rump/unknown_search.R @@ -1,5 +1,5 @@ # 2018.12.19. ask -rm(list=ls(all=TRUE)) +# rm(list=ls(all=TRUE)) # 20 Digits Precision Representation options(scipen=20) @@ -50,5 +50,10 @@ batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch', 5, 'ppm', mzs) -data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass') +if (nrow(batch_df)==0){ + data_merge <- data.frame(Empty=character()) +} else { + data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass') +} + write.csv(data_merge, opt$output, row.names=TRUE) From 3828b895b60f25bd7e4e5a12e69351815391ea59 Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Thu, 23 Apr 2020 23:19:11 -0400 Subject: [PATCH 7/9] debug --- rump/unknown_search.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rump/unknown_search.R b/rump/unknown_search.R index fd18809..44b5331 100755 --- a/rump/unknown_search.R +++ b/rump/unknown_search.R @@ -49,8 +49,8 @@ batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch', adduct, 5, 'ppm', - mzs) -if (nrow(batch_df)==0){ + c(0.00)) +if (typeof(batch_df)=="character"){ data_merge <- data.frame(Empty=character()) } else { data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass') From aed944bcd7a58479f87414ea1557069e12e5455e Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Fri, 24 Apr 2020 00:17:33 -0400 Subject: [PATCH 8/9] debug --- nextflow.config | 12 ++++++++++++ rump/unknown_search.R | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 8b82984..785a044 100644 --- a/nextflow.config +++ b/nextflow.config @@ -301,6 +301,18 @@ process cpus = 1 memory = '4 GB' } + withName: unknown_search_nobg + { + time = '15m' + cpus = 1 + memory = '4 GB' + } + withName: unknown_search_withbg + { + time = '15m' + cpus = 1 + memory = '4 GB' + } withName: mqc_figs { time = '15m' diff --git a/rump/unknown_search.R b/rump/unknown_search.R index 44b5331..1fee842 100755 --- a/rump/unknown_search.R +++ b/rump/unknown_search.R @@ -49,7 +49,7 @@ batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch', adduct, 5, 'ppm', - c(0.00)) + mzs) if (typeof(batch_df)=="character"){ data_merge <- data.frame(Empty=character()) } else { From 358fe47d9ac120e8e5ece88020a3484fa6790b6e Mon Sep 17 00:00:00 2001 From: GalaxyDream Date: Fri, 24 Apr 2020 01:37:22 -0400 Subject: [PATCH 9/9] adjust comments and rerun the tests --- .travis/RUMP-test_aftermzmine.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis/RUMP-test_aftermzmine.sh b/.travis/RUMP-test_aftermzmine.sh index ff99e65..910262f 100644 --- a/.travis/RUMP-test_aftermzmine.sh +++ b/.travis/RUMP-test_aftermzmine.sh @@ -1,2 +1,2 @@ -# Test processes after MZmine with sample data +# Test processes MZmine output files with sample data ./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0