diff --git a/.github/workflows/wdl_checker.yml b/.github/workflows/wdl_checker.yml index 2618fb5..54499dd 100644 --- a/.github/workflows/wdl_checker.yml +++ b/.github/workflows/wdl_checker.yml @@ -40,4 +40,3 @@ jobs: run: | # Add the commands to run your MiniWDL workflow miniwdl run wdl/metaMS_gcms.wdl -i wdl/metams_input_gcms.json --verbose --no-cache --copy-input-files - \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 9308ee2..481ad02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ +# Python base image FROM python:3.11.1-bullseye # Mono: 6.12 - RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF \ && echo "deb http://download.mono-project.com/repo/debian buster/snapshots/6.12 main" > /etc/apt/sources.list.d/mono-official.list \ && apt-get update \ @@ -12,17 +12,14 @@ RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E03280 # Pythonnet: 3.0.1 (from PyPI) # Note: pycparser must be installed before pythonnet can be built - RUN pip install pycparser \ && pip install pythonnet==3.0.1 +# Copy MetaMS contents WORKDIR /metams - COPY metaMS/ /metams/metaMS/ COPY README.md disclaimer.txt Makefile requirements.txt setup.py /metams/ COPY db/ /metams/db/ # Install the MetaMS package in editable mode RUN pip3 install --editable . - - diff --git a/configuration/gcms_corems.toml b/configuration/gcms_corems.toml index 9a61ded..8487e66 100644 --- a/configuration/gcms_corems.toml +++ b/configuration/gcms_corems.toml @@ -1,12 +1,25 @@ [MolecularSearch] -url_database = "sqlite:////metams/db/pnnl_lowres_gcms_compounds.sqlite" -ri_search_range = 35 +ri_search_range = 35.0 rt_search_range = 1.0 correlation_threshold = 0.5 score_threshold = 0.0 ri_spacing = 200.0 ri_std = 3.0 -ri_calibration_compound_names = [ " [C8] Methyl Caprylate [7.812]", " [C10] Methyl Caprate [10.647]", " [C9] Methyl Pelargonate [9.248]", " [C12] Methyl Laurate [13.250]", " [C14] Methyl Myristate [15.597]", " [C16] Methyl Palmitate [17.723]", " [C18] Methyl Stearate [19.663]", " [C20] Methyl Eicosanoate [21.441]", " [C22] Methyl Docosanoate [23.082]", " [C24] Methyl Linocerate [24.603]", " [C26] Methyl Hexacosanoate [26.023]", " [C28] Methyl Octacosanoate [27.349]", " [C30] Methyl Triacontanoate [28.72]",] +ri_calibration_compound_names = [ + "Methyl Caprylate", + "Methyl Caprate", + "Methyl Pelargonate", + "Methyl Laurate", + "Methyl Myristate", + "Methyl Palmitate", + "Methyl Stearate", + "Methyl Eicosanoate", + "Methyl Docosanoate", + "Methyl Linocerate", + "Methyl Hexacosanoate", + "Methyl Octacosanoate", + "Methyl Triacontanoate", +] exploratory_mode = false score_methods = [ "highest_sim_score", "highest_ss",] output_score_method = "All" @@ -17,12 +30,13 @@ implemented_smooth_method = [ "savgol", "hanning", "blackman", "bartlett", "flat smooth_window = 5 smooth_method = "savgol" savgol_pol_order = 2 +peak_derivative_threshold = 0.0005 peak_height_max_percent = 10.0 peak_max_prominence_percent = 1.0 min_peak_datapoints = 5.0 max_peak_width = 0.1 noise_threshold_method = "manual_relative_abundance" -implemented_noise_threshold_methods = [ "auto_relative_abundance", "manual_relative_abundance", "second_derivative",] +noise_threshold_methods_implemented = [ "auto_relative_abundance", "manual_relative_abundance", "second_derivative",] std_noise_threshold = 3 peak_height_min_percent = 0.1 peak_min_prominence_percent = 0.1 diff --git a/configuration/gcms_metams.toml b/configuration/gcms_metams.toml index c92ef11..eed996c 100644 --- a/configuration/gcms_metams.toml +++ b/configuration/gcms_metams.toml @@ -1,7 +1,8 @@ file_paths = [ "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf"] calibration_file_path = "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf" -corems_toml_path = "configuration/corems.toml" +corems_toml_path = "configuration/gcms_corems.toml" nmdc_metadata_path = "configuration/nmdc_metadata.json" output_directory = "output" output_filename = "output" output_type = "csv" +metabref_token_path = "configuration/metabref.token" diff --git a/metaMS/cli.py b/metaMS/cli.py index a6f719e..e7d50cc 100644 --- a/metaMS/cli.py +++ b/metaMS/cli.py @@ -1,9 +1,7 @@ -from multiprocessing import Pool from pathlib import Path import click import toml - from corems.encapsulation.output.parameter_to_json import dump_gcms_settings_toml from metaMS.gcmsWorkflow import ( @@ -12,6 +10,7 @@ run_gcms_metabolomics_workflow_wdl, run_nmdc_metabolomics_workflow, ) + from metaMS.lcms_lipidomics_workflow import ( LipidomicsWorkflowParameters, run_lcms_lipidomics_workflow, @@ -32,6 +31,7 @@ def cli(): @click.argument("output_type", required=True, type=str) @click.argument("corems_toml_path", required=True, type=str) @click.argument("nmdc_metadata_path", required=True, type=str) +@click.argument("metabref_token_path", required=True, type=str) @click.option("--jobs", "-j", default=4, help="'cpu's'") def run_gcms_wdl_workflow( file_paths, @@ -41,6 +41,7 @@ def run_gcms_wdl_workflow( output_type, corems_toml_path, nmdc_metadata_path, + metabref_token_path, jobs, ): """Run the GCMS workflow\n @@ -57,6 +58,7 @@ def run_gcms_wdl_workflow( output_filename, output_type, corems_toml_path, + metabref_token_path, jobs, ) diff --git a/metaMS/gcmsWorkflow.py b/metaMS/gcmsWorkflow.py index fbc6aab..49c8d7e 100644 --- a/metaMS/gcmsWorkflow.py +++ b/metaMS/gcmsWorkflow.py @@ -1,37 +1,112 @@ +import cProfile from dataclasses import dataclass from multiprocessing import Pool from pathlib import Path import toml - -from corems.mass_spectra.input.andiNetCDF import ReadAndiNetCDF from corems.encapsulation.input import parameter_from_json from corems.mass_spectra.calc.GC_RI_Calibration import get_rt_ri_pairs +from corems.mass_spectra.input.andiNetCDF import ReadAndiNetCDF +from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite from corems.molecular_id.search.compoundSearch import LowResMassSpectralMatch +from corems.molecular_id.search.database_interfaces import MetabRefGCInterface -import cProfile @dataclass class WorkflowParameters: - - file_paths: tuple = ('data/...', 'data/...') - #RI FAMES Calibration File - calibration_file_path: str = 'data/...' - #Sample/Process Metadata - nmdc_metadata_path: str = 'configuration/nmdc_metadata.json' - #configuration file for corems - corems_toml_path: str = 'configuration/corems.toml' - output_directory: str = 'data/...' - output_filename: str = 'data/...' - output_type: str = 'csv' - + """ + Data class to establish workflow parameters. + + Parameters + ---------- + file_paths : tuple(str) + Paths to files to process. + calibration_reference_path : str + FAMEs retention index reference SQLite database. + calibration_file_path : str + FAMEs retention index calibration filepath. + nmdc_metadata_path : str + Sample and processing metadata. + corems_toml_path : str + CoreMS configuration. + output_directory : str + Path to save outputs. + output_filename : str + Output filename. + output_type : + Output extension. + + """ + + # Filepaths to process + file_paths: tuple = ("data/...", "data/...") + + # RI FAMEs calibration files + calibration_reference_path: str = "data/..." + calibration_file_path: str = "data/..." + + # Sample/Process Metadata + nmdc_metadata_path: str = "configuration/nmdc_metadata.json" + + # Configuration file for corems + corems_toml_path: str = "configuration/corems.toml" + output_directory: str = "data/..." + output_filename: str = "data/..." + output_type: str = "csv" + + # Token + metabref_token_path: str = "configuration/..." + + def worker(args): + """ + Wraps `workflow_worker` using cProfile. + + """ + + cProfile.runctx("workflow_worker(args)", globals(), locals(), "gc-ms.prof") + + +def run_gcms_metabolomics_workflow_wdl( + file_paths, + calibration_file_path, + output_directory, + output_filename, + output_type, + corems_toml_path, + metabref_token_path, + jobs, + db_path=None, +): + """ + GCMS metabolomics workflow with WDL. - cProfile.runctx('workflow_worker(args)', globals(), locals(), 'gc-ms.prof') + Parameters + ---------- + file_paths : tuple(str) + Paths to files to process. + calibration_file_path : str + FAMEs retention index calibration filepath. + output_directory : str + Path to save outputs. + output_filename : str + Output filename. + output_type : + Output extension. + corems_toml_path : str + CoreMS configuration. + metabref_token_path : str + Token to authenticate MetabRef database access. + jobs : int + Number of concurrent jobs. + [unused] db_path : str + Path to database. + + """ -def run_gcms_metabolomics_workflow_wdl(file_paths, calibration_file_path, output_directory,output_filename, output_type, corems_toml_path, jobs, db_path=None): - import click + + # Store workflow parameters workflow_params = WorkflowParameters() workflow_params.file_paths = file_paths.split(",") workflow_params.calibration_file_path = calibration_file_path @@ -39,147 +114,321 @@ def run_gcms_metabolomics_workflow_wdl(file_paths, calibration_file_path, output workflow_params.output_filename = output_filename workflow_params.output_type = output_type workflow_params.corems_toml_path = corems_toml_path - + workflow_params.metabref_token_path = metabref_token_path + + # Load CoreMS settings + click.echo("Loading CoreMS settings from %s" % workflow_params.corems_toml_path) + + # Create output directory dirloc = Path(workflow_params.output_directory) dirloc.mkdir(exist_ok=True) - output_path = Path(workflow_params.output_directory)/workflow_params.output_filename - - rt_ri_pairs = get_calibration_rtri_pairs(workflow_params.calibration_file_path, workflow_params.corems_toml_path) - worker_args = [(file_path, rt_ri_pairs, workflow_params.corems_toml_path, workflow_params.calibration_file_path ) for file_path in workflow_params.file_paths] - #gcms_list = pool.map(workflow_worker, worker_args) - pool = Pool(int(jobs)) - - for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1): - eval('gcms.to_'+ workflow_params.output_type + '(output_path)') + # Determine output filepath + output_path = ( + Path(workflow_params.output_directory) / workflow_params.output_filename + ) + + # Load FAMEs calibration data + gcms_cal_obj = get_gcms( + workflow_params.calibration_file_path, workflow_params.corems_toml_path + ) + + # Load FAMEs calibration reference + MetabRefGCInterface().set_token(workflow_params.metabref_token_path) + fames_ref_sql = MetabRefGCInterface().get_fames(format="sql") + + # Compute RT:RI pairs + rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=fames_ref_sql) + + # Prepare worker arguments + worker_args = [ + ( + file_path, + rt_ri_pairs, + workflow_params.corems_toml_path, + workflow_params.calibration_file_path, + ) + for file_path in workflow_params.file_paths + ] + + # Create multiprocess pool + with Pool(int(jobs)) as pool: + # Map workflow over inputs + for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1): + eval("gcms.to_" + workflow_params.output_type + "(output_path)") - pool.close() - pool.join() def run_nmdc_metabolomics_workflow(workflow_params_file, jobs): - + """ + NMDC metabolomics workflow. + + Parameters + ---------- + workflow_params_file : str + Path to workflow parameters file. + jobs : int + Number of concurrent jobs. + + """ + import click - dms_file_path = 'db/GC-MS Metabolomics Experiments to Process Final.xlsx' - - click.echo('Loading Searching Settings from %s' % workflow_params_file) - workflow_params = read_workflow_parameter(workflow_params_file) - + + # [HARDCODED, UNUSED] Path to DMS file path? + dms_file_path = "db/GC-MS Metabolomics Experiments to Process Final.xlsx" + + # Load workflow settings + click.echo("Loading search settings from %s" % workflow_params_file) + workflow_params = load_workflow_parameters(workflow_params_file) + + # Create output directory dirloc = Path(workflow_params.output_directory) dirloc.mkdir(exist_ok=True) - - rt_ri_pairs = get_calibration_rtri_pairs(workflow_params.calibration_file_path, workflow_params.corems_toml_path) - - worker_args = [(file_path, rt_ri_pairs, workflow_params.corems_toml_path, workflow_params.calibration_file_path) for file_path in workflow_params.file_paths] - #gcms_list = pool.map(workflow_worker, worker_args) - pool = Pool(jobs) - - for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1): - - in_file_path = Path(workflow_params.file_paths[i]) - output_path = Path(workflow_params.output_directory)/in_file_path.name - - eval('gcms.to_'+ workflow_params.output_type + '(output_path, write_metadata=False)') - - #nmdc = NMDC_Metadata(in_file_path, workflow_params.calibration_file_path, output_path, dms_file_path) - #nmdc.create_nmdc_metadata(gcms) - - pool.close() - pool.join() - + + # Load FAMEs calibration data + gcms_cal_obj = get_gcms( + workflow_params.calibration_file_path, workflow_params.corems_toml_path + ) + + # Load FAMEs calibration reference + MetabRefGCInterface().set_token(workflow_params.metabref_token_path) + fames_ref_sql = MetabRefGCInterface().get_fames(format='sql') + + # Compute RT:RI pairs + rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=fames_ref_sql) + + # Prepare worker arguments + worker_args = [ + ( + file_path, + rt_ri_pairs, + workflow_params.corems_toml_path, + workflow_params.calibration_file_path, + ) + for file_path in workflow_params.file_paths + ] + + # Create multiprocess pool + with Pool(jobs) as pool: + # Map workflow over inputs + for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args)): + # Determine output path + input_path = Path(workflow_params.file_paths[i]) + output_path = Path(workflow_params.output_directory) / input_path.name + + eval( + "gcms.to_" + + workflow_params.output_type + + "(output_path, write_metadata=False)" + ) + + # nmdc = NMDC_Metadata(in_file_path, workflow_params.calibration_file_path, output_path, dms_file_path) + # nmdc.create_nmdc_metadata(gcms) + def run_gcms_metabolomics_workflow(workflow_params_file, jobs): + """ + GC/MS metabolomics workflow. + + Parameters + ---------- + workflow_params_file : str + Path to workflow parameters file. + jobs : int + Number of concurrent jobs. + + """ + import click - click.echo('Loading Searching Settings from %s' % workflow_params_file) - workflow_params = read_workflow_parameter(workflow_params_file) + # Load workflow settings + click.echo("Loading search settings from %s" % workflow_params_file) + workflow_params = load_workflow_parameters(workflow_params_file) + # Create output directory dirloc = Path(workflow_params.output_directory) dirloc.mkdir(exist_ok=True) - output_path = Path(workflow_params.output_directory)/workflow_params.output_filename - - rt_ri_pairs = get_calibration_rtri_pairs(workflow_params.calibration_file_path, workflow_params.corems_toml_path) - - worker_args = [(file_path, rt_ri_pairs, workflow_params.corems_toml_path, workflow_params.calibration_file_path) for file_path in workflow_params.file_paths] - #gcms_list = pool.map(workflow_worker, worker_args) - pool = Pool(jobs) - - for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1): - eval('gcms.to_'+ workflow_params.output_type + '(output_path)') - - pool.close() - pool.join() - -def read_workflow_parameter(gcms_workflow_paramaters_toml_file): - with open(gcms_workflow_paramaters_toml_file, 'r') as infile: - return WorkflowParameters(**toml.load(infile)) - -def get_calibration_rtri_pairs(ref_file_path, corems_paramaters_toml_file): - - gcms_ref_obj = get_gcms(ref_file_path, corems_paramaters_toml_file) - #sql_obj = start_sql_from_file() - #rt_ri_pairs = get_rt_ri_pairs(gcms_ref_obj,sql_obj=sql_obj) - # !!!!!! READ !!!!! use the previous two lines if db/EMSL_lowres_gcms_test_database.sqlite does not exist - # and comment the next line - rt_ri_pairs = get_rt_ri_pairs(gcms_ref_obj) - return rt_ri_pairs + + # Determine output filepath + output_path = ( + Path(workflow_params.output_directory) / workflow_params.output_filename + ) + + # Load FAMEs calibration data + gcms_cal_obj = get_gcms( + workflow_params.calibration_file_path, workflow_params.corems_toml_path + ) + + # Load FAMEs calibration reference + MetabRefGCInterface().set_token(workflow_params.metabref_token_path) + fames_ref_sql = MetabRefGCInterface().get_fames(format="sql") + + # Compute RT:RI pairs + rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=fames_ref_sql) + + # Prepare worker arguments + worker_args = [ + ( + file_path, + rt_ri_pairs, + workflow_params.corems_toml_path, + workflow_params.calibration_file_path, + ) + for file_path in workflow_params.file_paths + ] + + # Create multiprocess pool + with Pool(jobs) as pool: + # Map workflow over inputs + for i, gcms in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1): + eval("gcms.to_" + workflow_params.output_type + "(output_path)") + + +def read_toml(path): + """ + Read TOML file. + + Parameters + ---------- + path : str + Path to TOML file. + + Returns + ------- + dict + Dictionary of parameter:value pairs. + + """ + + with open(path, "r", encoding="utf8") as stream: + return toml.load(stream) + + +def load_workflow_parameters(path): + """ + Load workflow configuration parameters from file. + + Parameters + ---------- + path : str + Path to parameters file. + + Returns + ------- + :obj:`WorkflowParameters` + Data class containing workflow parameters. + + """ + + return WorkflowParameters(**read_toml(path)) + + +def load_corems_parameters(path): + """ + Load workflow configuration parameters from file. + + Parameters + ---------- + path : str + Path to parameters file. + + Returns + ------- + dict + Dictionary of parameter:value pairs. + + """ + + return read_toml(path) + def workflow_worker(args): - - file_path, ref_dict, corems_params, cal_file_path = args - - gcms = get_gcms(file_path, corems_params) - + """ + Wrap data processing functionality for parallel execution. Loads GC data, + applies calibration, performs spectral search. + + Parameters + ---------- + args : tuple + Arguments fed to worker. + + Returns + ------- + gcms + GCMS object. + + """ + + # Unpack arguments + file_path, ref_dict, corems_params_file, cal_file_path = args + + # Load data + gcms = get_gcms(file_path, corems_params_file) + + # Calibrate retention indices gcms.calibrate_ri(ref_dict, cal_file_path) - - # sql_obj = start_sql_from_file() - # lowResSearch = LowResMassSpectralMatch(gcms, sql_obj=sql_obj) - # !!!!!! READ !!!!! use the previous two lines if db/pnnl_lowres_gcms_compounds.sqlite does not exist - # and comment the next line - lowResSearch = LowResMassSpectralMatch(gcms) + + # Load reference database + ref_db_sql = MetabRefGCInterface().get_library(format="sql") + + # Perform search + lowResSearch = LowResMassSpectralMatch(gcms, sql_obj=ref_db_sql) lowResSearch.run() return gcms + def get_gcms(file_path, corems_params): - + """ + Convenience function to load and process file according to CoreMS configuration + parameters. + + Parameters + ---------- + + """ + + # Read NetCDF file reader_gcms = ReadAndiNetCDF(file_path) - + + # Process data reader_gcms.run() - + + # Export to GCMS object gcms = reader_gcms.get_gcms_obj() - parameter_from_json.load_and_set_toml_parameters_gcms(gcms, parameters_path=corems_params) - + # Set parameters from file + parameter_from_json.load_and_set_toml_parameters_gcms( + gcms, parameters_path=corems_params + ) + + # Process chromatogram gcms.process_chromatogram() - return gcms -def start_sql_from_file(): - - from pathlib import Path - from corems.molecular_id.input.nistMSI import ReadNistMSI - - ref_lib_path = Path("data/PNNLMetV20191015.MSL") - if ref_lib_path.exists: - sql_obj = ReadNistMSI(ref_lib_path).get_sqlLite_obj() - return sql_obj - - -def run_gcms_mpi(workflow_params_file, replicas, rt_ri_pairs): - - import os, sys - sys.path.append(os.getcwd()) - from mpi4py import MPI - - workflow_params = read_workflow_parameter(workflow_params_file) - rt_ri_pairs = get_calibration_rtri_pairs(workflow_params.calibration_file_path, workflow_params.corems_toml_path) - worker_args = [(file_path, rt_ri_pairs, workflow_params.corems_toml_path, workflow_params.calibration_file_path) for file_path in workflow_params.file_paths] - - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - # will only run tasks up to the number of files paths selected in the EnviroMS File - if rank < len(worker_args): - workflow_worker(worker_args[rank]) \ No newline at end of file + +# def run_gcms_mpi(workflow_params_file, replicas, rt_ri_pairs): + +# import os, sys +# sys.path.append(os.getcwd()) +# from mpi4py import MPI + +# workflow_params = load_workflow_parameters(workflow_params_file) + +# gcms_cal_obj = get_gcms(workflow_params.calibration_file_path, +# workflow_params.corems_toml_path) +# sql_obj = EI_LowRes_SQLite(url="sqlite:///db/MetabRef_FAMEs_EILowRes_20240816.db") +# rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=sql_obj) + +# worker_args = [(file_path, rt_ri_pairs, workflow_params.corems_toml_path, workflow_params.calibration_file_path) for file_path in workflow_params.file_paths] + +# comm = MPI.COMM_WORLD +# rank = comm.Get_rank() +# size = comm.Get_size() + +# # will only run tasks up to the number of files paths selected in the EnviroMS File +# if rank < len(worker_args): +# workflow_worker(worker_args[rank]) + + +# if diff --git a/metaMS/lcms_lipidomics_workflow.py b/metaMS/lcms_lipidomics_workflow.py index c72052f..7511d31 100644 --- a/metaMS/lcms_lipidomics_workflow.py +++ b/metaMS/lcms_lipidomics_workflow.py @@ -5,7 +5,6 @@ from multiprocessing import Pool from corems.mass_spectra.input.mzml import MZMLSpectraParser -from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader @dataclass class LipidomicsWorkflowParameters: @@ -55,7 +54,7 @@ def instantiate_lcms_obj(file_in): # Instantiate parser based on binary file type if ".raw" in str(file_in): #TODO KRH: Add real functionality here - pass + from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader #parser = ImportMassSpectraThermoMSFileReader(file_in) if ".mzML" in str(file_in): diff --git a/requirements.txt b/requirements.txt index edeb4e1..8a2bc52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -corems==3.0.2 +corems>=3.0.2 Click>=7.1.1 requests nmdc-schema>=7.0.0 \ No newline at end of file diff --git a/wdl/metaMS_gcms.wdl b/wdl/metaMS_gcms.wdl index 2c812f9..b7fbd12 100644 --- a/wdl/metaMS_gcms.wdl +++ b/wdl/metaMS_gcms.wdl @@ -19,6 +19,7 @@ task runMetaMSGCMS { String output_type File corems_toml_path File nmdc_metadata_path + File metabref_token_path Int jobs_count } @@ -31,6 +32,7 @@ task runMetaMSGCMS { ${output_type} \ ${corems_toml_path} \ ${nmdc_metadata_path} \ + ${metabref_token_path} \ --jobs ${jobs_count} } @@ -41,6 +43,7 @@ task runMetaMSGCMS { } runtime { - docker: "microbiomedata/metams:2.2.2" + docker: "local-metams:latest" + #TODO KRH: Change to dockerhub version after we've pushed the updated image } } \ No newline at end of file diff --git a/wdl/metams_input_gcms.json b/wdl/metams_input_gcms.json index 4b1cf0f..cd6a3f9 100644 --- a/wdl/metams_input_gcms.json +++ b/wdl/metams_input_gcms.json @@ -1,15 +1,14 @@ { - "gcmsMetabolomics.runMetaMSGCMS.file_paths": [ - "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", - "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf" - ], - - "gcmsMetabolomics.runMetaMSGCMS.calibration_file_path": "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", - "gcmsMetabolomics.runMetaMSGCMS.output_directory": "test_output", - "gcmsMetabolomics.runMetaMSGCMS.output_filename": "test_dataset", - "gcmsMetabolomics.runMetaMSGCMS.output_type": "csv", - "gcmsMetabolomics.runMetaMSGCMS.corems_toml_path": "./configuration/gcms_corems.toml", - "gcmsMetabolomics.runMetaMSGCMS.nmdc_metadata_path": "./configuration/nmdc_metadata.json", - "gcmsMetabolomics.runMetaMSGCMS.jobs_count": 4 - + "gcmsMetabolomics.runMetaMSGCMS.file_paths": [ + "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", + "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf" +], +"gcmsMetabolomics.runMetaMSGCMS.calibration_file_path": "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", +"gcmsMetabolomics.runMetaMSGCMS.output_directory": "test_output", +"gcmsMetabolomics.runMetaMSGCMS.output_filename": "test_dataset", +"gcmsMetabolomics.runMetaMSGCMS.output_type": "csv", +"gcmsMetabolomics.runMetaMSGCMS.corems_toml_path": "./configuration/gcms_corems.toml", +"gcmsMetabolomics.runMetaMSGCMS.nmdc_metadata_path": "./configuration/nmdc_metadata.json", +"gcmsMetabolomics.runMetaMSGCMS.metabref_token_path": "./configuration/metabref.token", +"gcmsMetabolomics.runMetaMSGCMS.jobs_count": 4 } \ No newline at end of file