Skip to content

Commit

Permalink
Merge pull request #35 from lemaslab/xinsong
Browse files Browse the repository at this point in the history
add unknown search
  • Loading branch information
XinsongDu authored Apr 24, 2020
2 parents 7bac1ab + 358fe47 commit 701cad5
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .travis/RUMP-test_aftermzmine.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Test processes after MZmine with sample data
./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker galaxydream/metabolomics_pipeline
# Test processes MZmine output files with sample data
./nextflow run_aftermzmine.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 --pos_mzmine_peak_output .travis/pos_data.csv --neg_mzmine_peak_output .travis/neg_data.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0
2 changes: 1 addition & 1 deletion .travis/RUMP-test_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux.zip && unzip MZmine-2.53-Linux.zip && rm MZmine-2.53-Linux.zip

# Test all processes with sample data
./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker galaxydream/metabolomics_pipeline
./nextflow main.nf --input_dir_pos .travis/data/POS/ --input_dir_neg .travis/data/NEG --POS_design_path .travis/pos_design.csv --NEG_design_path .travis/neg_design.csv --cutoff 1 -with-docker xinsongdu/lemaslab_rump:v1.0.0

10 changes: 7 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Dockerfile for UMPIRE
# Dockerfile for RUMP

FROM rocker/r-ver:3.5.2
FROM rocker/rstudio:3.6.3

MAINTAINER [email protected]

Expand Down Expand Up @@ -58,7 +58,11 @@ WORKDIR /app
COPY accessibility.properties /app

# Fix a bug for java
RUN mv accessibility.properties /etc/java-8-openjdk/
# RUN mv accessibility.properties /etc/java-8-openjdk/

# install R packages
COPY r_package_install.R /app
RUN Rscript r_package_install.R

# Install mummichog
RUN pip install --upgrade setuptools
Expand Down
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ wget https://github.com/mzmine/mzmine2/releases/download/v2.53/MZmine-2.53-Linux
```
4. Pull singularity image if using high-performance computing (**if using local machine, skip this step**)
```
mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v0.0.0
mkdir -p work/singularity && singularity pull --name work/singularity/xinsongdu-lemaslab_reump.img docker://xinsongdu/lemaslab_rump:v1.0.0
```

# General Behavior
Expand Down Expand Up @@ -78,11 +78,11 @@ Negative mode:
- Create design files for positve data and negative data, indicating the group of each file, save them to `data/pos_design.csv` and `data/neg_design.csv`. Sample design file can be found in `data/sample_data/pos_design.csv` and `data/sample_data/neg_design.csv`
- Process your data with default parameters using local machine
```
nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0
nextflow main.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0
```
- Process your data with default parameters using high-performance computing (It is recommended to maximize CPU and memory in pos_peakDetection_mzmine and neg_peakDetection_mzmine processes in `nextflow.config` if using high-performance computing)
```
nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0
nextflow main.nf --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0
```

### Process dataframe generatd by MZmine-2.53
Expand All @@ -91,7 +91,7 @@ nextflow main.nf --container singularity -with-singularity docker://xinsongdu/le
- Create design files describing the group of each column of positive/negative data, save them to `data/pos_design.csv` and `data/neg_design.csv`
- Get statistical analysis and pathway analysis
```
nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v0.0.0
nextflow run_aftermzmine.nf -with-docker xinsongdu/lemaslab_rump:v1.0.0
```

### Help message
Expand All @@ -113,7 +113,7 @@ Check https://github.com/lemaslab/RUMP for updates, and refer to
https://github.com/lemaslab/RUMP/wiki
Usage:
nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v0.0.0
nextflow run_all.nf [options] -with-docker xinsongdu/lemaslab_rump:v1.0.0
Arguments (it is mandatory to change `input_file` and `mzmine_dir` before running:
----------------------------- common parameters ----------------------------------
Expand All @@ -128,7 +128,7 @@ Please refer to nextflow.config for more options.
Container:
Docker image to use with -with-docker|-with-singularity options is
'docker://xinsongdu/lemaslab_rump:v0.0.0'
'docker://xinsongdu/lemaslab_rump:v1.0.0'
RUMP supports .mzXML format files.
```
Expand Down Expand Up @@ -163,13 +163,13 @@ RUMP returns the following exit status values:
### Running tests on local machine

```
nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v0.0.0
nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv -with-docker xinsongdu/lemaslab_rump:v1.0.0
```

### Running tests on high-performance computing

```
nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v0.0.0
nextflow main.nf --input_dir_pos functional_test/sample_data/POS/ --input_dir_neg functional_test/sample_data/NEG --POS_design_path functional_test/sample_data/pos_design.csv --NEG_design_path functional_test/sample_data/neg_design.csv --container singularity -with-singularity docker://xinsongdu/lemaslab_rump:v1.0.0
```

# Bug reporting and feature requests
Expand Down
63 changes: 58 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ MQC_CONFIG = Channel.fromPath(params.mqc_config)
PYTHON_MUMMICHOG_INPUT_PREPARE = Channel.fromPath(params.python_mummichog_input_prepare)
PYTHON_MUMMICHOG_INPUT_PREPARE.into{PYTHON_MUMMICHOG_INPUT_PREPARE_NOBG; PYTHON_MUMMICHOG_INPUT_PREPARE_WITHBG}

// R code for unknown search
R_UNKNOWN_SEARCH = Channel.fromPath(params.r_unknown_search)
R_UNKNOWN_SEARCH.into{R_UNKNOWN_SEARCH_NOBG; R_UNKNOWN_SEARCH_WITHBG}

// Result files used by MultiQC to generate report.
// MQC_DIR = Channel.fromPath(params.mqc_dir, type: 'dir')

Expand Down Expand Up @@ -143,7 +147,7 @@ if (params.help) {
exit 1
}

// Unit tests
// Check appropriateness of input
process input_check {

echo true
Expand Down Expand Up @@ -278,8 +282,8 @@ process add_stats {
"""
}

POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG}
NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG}
POS_DATA_NOBG.into{POS_NOBG_FOR_BS; POS_NOBG_FOR_MQC; POS_NOBG_FOR_PCA; POS_NOBG_FOR_HCLUSTERING; POS_NOBG_FOR_VD; POS_NOBG_FOR_BARPLOT; POS_NOBG_FOR_MUMMICHOG; POS_NOBG_FOR_UNKNOWN_SEARCH}
NEG_DATA_NOBG.into{NEG_NOBG_FOR_BS; NEG_NOBG_FOR_MQC; NEG_NOBG_FOR_PCA; NEG_NOBG_FOR_HCLUSTERING; NEG_NOBG_FOR_VD; NEG_NOBG_FOR_BARPLOT; NEG_NOBG_FOR_MUMMICHOG; NEG_NOBG_FOR_UNKNOWN_SEARCH}

// Background subtraction
process blank_subtraction {
Expand Down Expand Up @@ -311,8 +315,8 @@ process blank_subtraction {


// split channel content for multiple-time use
POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG}
NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG}
POS_DATA_WITHBG.into{POS_WITHBG_FOR_MQC; POS_WITHBG_FOR_PCA; POS_WITHBG_FOR_HCLUSTERING; POS_WITHBG_FOR_VD; POS_WITHBG_FOR_BARPLOT; POS_WITHBG_FOR_MUMMICHOG; POS_WITHBG_FOR_UNKNOWN_SEARCH}
NEG_DATA_WITHBG.into{NEG_WITHBG_FOR_MQC; NEG_WITHBG_FOR_PCA; NEG_WITHBG_FOR_HCLUSTERING; NEG_WITHBG_FOR_VD; NEG_WITHBG_FOR_BARPLOT; NEG_WITHBG_FOR_MUMMICHOG; NEG_WITHBG_FOR_UNKNOWN_SEARCH}

// Process for generating files that can be parsed by MultiQC regarding peak numbers of different steps.
process mqc_peak_number_comparison {
Expand Down Expand Up @@ -568,6 +572,55 @@ process bar_plot_withbg {

}

// unknown search for metabolites identified before blank subtraction
process unknown_search_nobg {

publishDir './results/peak_table/', mode: 'copy'

input:
file data_pos from POS_NOBG_FOR_UNKNOWN_SEARCH
file data_neg from NEG_NOBG_FOR_UNKNOWN_SEARCH
file r_unknown_search from R_UNKNOWN_SEARCH_NOBG

output:
file params.unknown_search_pos_nobg into UNKNOWN_SEARCH_POS_NOBG
file params.unknown_search_neg_nobg into UNKNOWN_SEARCH_NEG_NOBG

shell:
"""
Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_nobg} -o ${params.unknown_search_pos_nobg} &&
Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_nobg} -o ${params.unknown_search_neg_nobg}
"""

}

// unknown search for metabolites identified after blank subtraction
process unknown_search_withbg {

publishDir './results/peak_table/', mode: 'copy'

input:
file data_pos from POS_WITHBG_FOR_UNKNOWN_SEARCH
file data_neg from NEG_WITHBG_FOR_UNKNOWN_SEARCH
file r_unknown_search from R_UNKNOWN_SEARCH_WITHBG

output:
file params.unknown_search_pos_withbg into UNKNOWN_SEARCH_POS_WITHBG
file params.unknown_search_neg_withbg into UNKNOWN_SEARCH_NEG_WITHBG

when:
params.bs == "1"

shell:
"""
Rscript ${r_unknown_search} -i ${data_pos} -n positive -c ${params.mz_col_pos_withbg} -o ${params.unknown_search_pos_withbg} &&
Rscript ${r_unknown_search} -i ${data_neg} -n negative -c ${params.mz_col_neg_withbg} -o ${params.unknown_search_neg_withbg}
"""

}

process mqc_figs {

publishDir './results/mqc/', mode: 'copy'
Expand Down
27 changes: 26 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,13 @@ params
python_barplot = "./rump/bar_plot.py"
data_info = "./rump/data_info.py"
peak_number_comparison_path = "./rump/peak_number_comparison.py"

python_bs = "./rump/blank_subtraction.py"
r_unknown_search = "./rump/unknown_search.R"

mz_col_pos_nobg = "row.m.z"
mz_col_neg_nobg = "row.m.z"
mz_col_pos_withbg = "row.m.z"
mz_col_neg_withbg = "row.m.z"

mqc_dir = "./results/mqc/"
experiments_info = "./rump/software_descriptions_mqc.txt"
Expand Down Expand Up @@ -193,6 +198,14 @@ params
barplot_neg_withbg = "neg_barplot_group1_withbg.png"
barplot_neg_withbg_om = "neg_onlymatched_barplot_group1_withbg.png"

// outputs for unknown_search_nobg
unknown_search_pos_nobg = "unknown_search_pos_nobg.csv"
unknown_search_neg_nobg = "unknown_search_neg_nobg.csv"

// outputs for unknown_search_withbg
unknown_search_pos_withbg = "unknown_search_pos_withbg.csv"
unknown_search_neg_withbg = "unknown_search_neg_withbg.csv"

// regarding mummichog
python_mummichog_input_prepare = "./rump/mummichog_input_prepare.py"
data_pos_nobg_group1_mummichog = "data_pos_nobg_group1_mummichog.txt"
Expand Down Expand Up @@ -288,6 +301,18 @@ process
cpus = 1
memory = '4 GB'
}
withName: unknown_search_nobg
{
time = '15m'
cpus = 1
memory = '4 GB'
}
withName: unknown_search_withbg
{
time = '15m'
cpus = 1
memory = '4 GB'
}
withName: mqc_figs
{
time = '15m'
Expand Down
4 changes: 4 additions & 0 deletions r_package_install.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# install neccessary packages
list.of.packages <- c("cmmr", "optparse")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
59 changes: 59 additions & 0 deletions rump/unknown_search.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# 2018.12.19. ask
# rm(list=ls(all=TRUE))

# 20 Digits Precision Representation
options(scipen=20)

# Setting the correct working directory.
# NOTE!!! -> Can be linked differently on different computers.
# setwd("/Users/xinsongdu/mnt/projects/beach01/secimtools")

library(optparse) # add this library to enable argparse arguments
library(cmmr)
options(warn=-1)

## Define input and output arguments
option_list = list(
make_option(c("-i", "--input"), type="character", default="bovine_enriched_unknown.csv",
help="input data file"),
make_option(c("-c", "--mz_col"), type="character", default="row.m.z",
help="column name indicating m/z values"),
make_option(c("-n", "--ion"), type="character", default="positive",
help="ion mode"),
make_option(c("-o", "--output"), type="character", default="searched_unknown_pos_after_blank_subtraction.csv",
help="output csv file name")
);

opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

# read data
data <- read.csv(file=opt$input)

# extract mz values from data
mzs = as.vector(data[['row.m.z']])
# mzs = lapply(mzs,round,4)

if (opt$ion=="negative"){
adduct <- '["M-H"]'
} else {
adduct <- '["M+H"]'
}

# batch search
batch_df <- batch_search('http://ceumass.eps.uspceu.es/mediator/api/v3/batch',
'all-except-peptides',
'["all-except-mine"]',
'mz',
opt$ion,
adduct,
5,
'ppm',
mzs)
if (typeof(batch_df)=="character"){
data_merge <- data.frame(Empty=character())
} else {
data_merge <- merge(data, batch_df, by.x='row.m.z', by.y='experimental_mass')
}

write.csv(data_merge, opt$output, row.names=TRUE)
Loading

0 comments on commit 701cad5

Please sign in to comment.