Skip to content

Commit

Permalink
Refactor to interface with MetabRef database
Browse files Browse the repository at this point in the history
  • Loading branch information
smcolby committed Dec 13, 2024
1 parent f2a3ca8 commit 6e884c3
Show file tree
Hide file tree
Showing 6 changed files with 5 additions and 23 deletions.
2 changes: 1 addition & 1 deletion configuration/gcms_corems.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[MolecularSearch]
url_database = "sqlite:///db/MetabRef_Library_EILowRes_20240816.db"
url_database = "sqlite:///db/MetabRef_Library_EILowRes_20240816.db" # This is no longer needed
ri_search_range = 35.0
rt_search_range = 1.0
correlation_threshold = 0.5
Expand Down
1 change: 0 additions & 1 deletion configuration/gcms_metams.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
file_paths = [ "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf", "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf"]
calibration_reference_path = "sqlite:///db/MetabRef_FAMEs_EILowRes_20240816.db"
calibration_file_path = "data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf"
corems_toml_path = "configuration/gcms_corems.toml"
nmdc_metadata_path = "configuration/nmdc_metadata.json"
Expand Down
3 changes: 0 additions & 3 deletions metaMS/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def cli():

@cli.command()
@click.argument("file_paths", required=True, type=str)
@click.argument("calibration_reference_path", required=True, type=str)
@click.argument("calibration_file_path", required=True, type=str)
@click.argument("output_directory", required=True, type=str)
@click.argument("output_filename", required=True, type=str)
Expand All @@ -35,7 +34,6 @@ def cli():
@click.option("--jobs", "-j", default=4, help="'cpu's'")
def run_gcms_wdl_workflow(
file_paths,
calibration_reference_path,
calibration_file_path,
output_directory,
output_filename,
Expand All @@ -53,7 +51,6 @@ def run_gcms_wdl_workflow(
click.echo("Running gcms workflow")
run_gcms_metabolomics_workflow_wdl(
file_paths,
calibration_reference_path,
calibration_file_path,
output_directory,
output_filename,
Expand Down
19 changes: 4 additions & 15 deletions metaMS/gcmsWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from corems.mass_spectra.input.andiNetCDF import ReadAndiNetCDF
from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite
from corems.molecular_id.search.compoundSearch import LowResMassSpectralMatch
from corems.molecular_id.search.database_interfaces import MetabRefGCInterface


@dataclass
Expand Down Expand Up @@ -81,8 +82,6 @@ def run_gcms_metabolomics_workflow_wdl(
----------
file_paths : tuple(str)
Paths to files to process.
calibration_reference_path : str
FAMEs retention index calibration reference filepath.
calibration_file_path : str
FAMEs retention index calibration filepath.
output_directory : str
Expand All @@ -105,7 +104,6 @@ def run_gcms_metabolomics_workflow_wdl(
# Store workflow parameters
workflow_params = WorkflowParameters()
workflow_params.file_paths = file_paths.split(",")
workflow_params.calibration_reference_path = calibration_reference_path
workflow_params.calibration_file_path = calibration_file_path
workflow_params.output_directory = output_directory
workflow_params.output_filename = output_filename
Expand All @@ -130,9 +128,7 @@ def run_gcms_metabolomics_workflow_wdl(
)

# Load FAMEs calibration reference
fames_ref_sql = EI_LowRes_SQLite(
url=workflow_params.calibration_reference_path
)
fames_ref_sql = MetabRefGCInterface().get_fames(format="sql")

# Compute RT:RI pairs
rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=fames_ref_sql)
Expand Down Expand Up @@ -257,9 +253,7 @@ def run_gcms_metabolomics_workflow(workflow_params_file, jobs):
)

# Load FAMEs calibration reference
fames_ref_sql = EI_LowRes_SQLite(
url=workflow_params.calibration_reference_path
)
fames_ref_sql = MetabRefGCInterface().get_fames(format="sql")

# Compute RT:RI pairs
rt_ri_pairs = get_rt_ri_pairs(gcms_cal_obj, sql_obj=fames_ref_sql)
Expand Down Expand Up @@ -360,19 +354,14 @@ def workflow_worker(args):
# Unpack arguments
file_path, ref_dict, corems_params_file, cal_file_path = args

# Load CoreMS parameters
corems_parameters = load_corems_parameters(corems_params_file)

# Load data
gcms = get_gcms(file_path, corems_params_file)

# Calibrate retention indices
gcms.calibrate_ri(ref_dict, cal_file_path)

# Load reference database
ref_db_sql = EI_LowRes_SQLite(
url=corems_parameters["MolecularSearch"]["url_database"]
)
ref_db_sql = MetabRefGCInterface().get_library(format="sql")

# Perform search
lowResSearch = LowResMassSpectralMatch(gcms, sql_obj=ref_db_sql)
Expand Down
2 changes: 0 additions & 2 deletions wdl/metaMS_gcms.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ workflow gcmsMetabolomics {
task runMetaMSGCMS {
input {
Array[File] file_paths
File calibration_reference_path
File calibration_file_path
String output_directory
String output_filename
Expand All @@ -26,7 +25,6 @@ task runMetaMSGCMS {
command {
metaMS run-gcms-wdl-workflow \
${sep=',' file_paths} \
${calibration_reference_path} \
${calibration_file_path} \
${output_directory} \
${output_filename} \
Expand Down
1 change: 0 additions & 1 deletion wdl/metams_input_gcms.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf",
"./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf"
],
"gcmsMetabolomics.runMetaMS.calibration_reference_path": "sqlite:///db/MetabRef_FAMEs_EILowRes_20240816.db",
"gcmsMetabolomics.runMetaMS.calibration_file_path": "./data/raw_data/GCMS_FAMES_01_GCMS-01_20191023.cdf",
"gcmsMetabolomics.runMetaMS.output_directory": "test_output",
"gcmsMetabolomics.runMetaMS.output_filename": "test_dataset",
Expand Down

0 comments on commit 6e884c3

Please sign in to comment.