Skip to content

Commit

Permalink
Merge branch 'add_lipidworkflow_holder' into 'lipid_dev'
Browse files Browse the repository at this point in the history
Add holder lipid workflow wdl etc

See merge request mass-spectrometry/metams!6
  • Loading branch information
kheal committed Nov 21, 2024
2 parents e9aea68 + c7b14b0 commit f980552
Show file tree
Hide file tree
Showing 11 changed files with 694 additions and 52 deletions.
19 changes: 17 additions & 2 deletions .github/workflows/wdl_checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,27 @@ jobs:
with:
python-version: '3.8' # specify the Python version you need

- name: Install Docker
run: |
curl -fsSL https://get.docker.com -o get-docker.sh
sh get-docker.sh
sudo usermod -aG docker $USER
- name: Build Docker Image
run: |
docker build -t local-metams:latest .
- name: Install MiniWDL
run: |
python -m pip install --upgrade pip
pip install miniwdl
- name: Run MiniWDL
- name: Test Lipid MiniWDL
run: |
miniwdl run wdl/metaMS_lipidomics.wdl -i wdl/metams_input_lipidomics.json --verbose --no-cache --copy-input-files
- name: Test GCMS MiniWDL
run: |
# Add the commands to run your MiniWDL workflow
miniwdl run wdl/metaMS_gcms.wdl -i wdl/metams_input_gcms.json --verbose --no-cache --copy-input-files
miniwdl run wdl/metaMS_gcms.wdl -i wdl/metams_input_gcms.json --verbose --no-cache --copy-input-files
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ __pycache__/
*.so
*config.yaml

# .config.yaml file
.config.yaml

# Distribution / packaging
.Python
Expand Down
22 changes: 20 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
FROM corilo/corems:base-mono-pythonnet
FROM python:3.11.1-bullseye

# Mono: 6.12

RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF \
&& echo "deb http://download.mono-project.com/repo/debian buster/snapshots/6.12 main" > /etc/apt/sources.list.d/mono-official.list \
&& apt-get update \
&& apt-get install -y clang \
&& apt-get install -y mono-devel=6.12\* \
&& rm -rf /var/lib/apt/lists/* /tmp/*


# Pythonnet: 3.0.1 (from PyPI)
# Note: pycparser must be installed before pythonnet can be built

RUN pip install pycparser \
&& pip install pythonnet==3.0.1

WORKDIR /metams

COPY metaMS/ /metams/metaMS/
COPY README.md disclaimer.txt Makefile requirements.txt setup.py /metams/
COPY db/ /metams/db/
RUN pip3 install --editable .

# Install the MetaMS package in editable mode
RUN pip3 install --editable .


306 changes: 306 additions & 0 deletions configuration/lipid_configs/emsl_lipidomics_corems_params.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
[LiquidChromatograph]
scans = [ -1, -1,]
eic_tolerance_ppm = 5.0
smooth_window = 5
smooth_method = "savgol"
implemented_smooth_method = [ "savgol", "hanning", "blackman", "bartlett", "flat", "boxcar",]
savgol_pol_order = 2
peak_height_max_percent = 10.0
peak_max_prominence_percent = 1.0
peak_derivative_threshold = 0.0005
min_peak_datapoints = 5.0
noise_threshold_method = "manual_relative_abundance"
noise_threshold_methods_implemented = [ "auto_relative_abundance", "manual_relative_abundance", "second_derivative",]
peak_height_min_percent = 0.1
eic_signal_threshold = 0.01
peak_picking_method = "persistent homology"
implemented_peak_picking_methods = [ "persistent homology",]
mass_feature_cluster_mz_tolerance_rel = 5e-6
mass_feature_cluster_rt_tolerance = 0.3
ms1_scans_to_average = 1
ms1_deconvolution_corr_min = 0.95
ms2_dda_rt_tolerance = 0.15
ms2_dda_mz_tolerance = 0.05
ms2_min_fe_score = 0.3
search_as_lipids = true
include_fragment_types = true
ph_inten_min_rel = 0.05
ph_persis_min_rel = 0.005
ph_smooth_it = 0
export_profile_spectra = false
export_eics = true
export_unprocessed_ms1 = false

[MassSpectrum]
noise_threshold_method = "relative_abundance"
noise_threshold_methods_implemented = [ "minima", "signal_noise", "relative_abundance", "absolute_abundance", "log",]
noise_threshold_min_std = 6
noise_threshold_min_s2n = 4.0
noise_threshold_min_relative_abundance = 1
noise_threshold_absolute_abundance = 1000000.0
noise_threshold_log_nsigma = 6
noise_threshold_log_nsigma_corr_factor = 0.463
noise_threshold_log_nsigma_bins = 500
noise_min_mz = 0
noise_max_mz = inf
min_picking_mz = 0
max_picking_mz = inf
picking_point_extrapolate = 3
calib_minimize_method = "Powell"
calib_pol_order = 2
max_calib_ppm_error = 1.0
min_calib_ppm_error = -1.0
calib_sn_threshold = 2.0
do_calibration = true

[MassSpecPeak]
kendrick_rounding_method = "floor"
implemented_kendrick_rounding_methods = [ "floor", "ceil", "round",]
peak_derivative_threshold = 0.0
peak_min_prominence_percent = 0.1
min_peak_datapoints = 5.0
peak_max_prominence_percent = 0.1
peak_height_max_percent = 10.0
legacy_resolving_power = false

[MS1MolecularSearch]
use_isotopologue_filter = false
isotopologue_filter_threshold = 33.0
isotopologue_filter_atoms = [ "Cl", "Br",]
use_runtime_kendrick_filter = false
use_min_peaks_filter = true
min_peaks_per_class = 15
url_database = ""
db_jobs = 3
db_chunk_size = 300
ion_charge = -1
min_hc_filter = 0.3
max_hc_filter = 3.0
min_oc_filter = 0.0
max_oc_filter = 5
min_op_filter = 2.0
use_pah_line_rule = false
min_dbe = 0
max_dbe = 50
mz_error_score_weight = 0.6
isotopologue_score_weight = 0.4
adduct_atoms_neg = [ "Cl", "Br",]
adduct_atoms_pos = [ "Na", "K",]
score_methods = [ "S_P_lowest_error", "N_S_P_lowest_error", "lowest_error", "prob_score", "air_filter_error", "water_filter_error", "earth_filter_error",]
score_method = "prob_score"
output_min_score = 0.1
output_score_method = "All Candidates"
isRadical = false
isProtonated = true
isAdduct = false
ion_types_excluded = []
ionization_type = "ESI"
min_ppm_error = -5
max_ppm_error = 5
min_abun_error = -100.0
max_abun_error = 100.0
mz_error_range = 1.5
error_method = "None"
mz_error_average = 0.0

[MS2MolecularSearch]
use_isotopologue_filter = false
isotopologue_filter_threshold = 33.0
isotopologue_filter_atoms = [ "Cl", "Br",]
use_runtime_kendrick_filter = false
use_min_peaks_filter = true
min_peaks_per_class = 15
url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@molformdb:5432/coremsapp"
db_jobs = 3
db_chunk_size = 300
ion_charge = -1
min_hc_filter = 0.3
max_hc_filter = 3.0
min_oc_filter = 0.0
max_oc_filter = 1.2
min_op_filter = 2.0
use_pah_line_rule = false
min_dbe = 0.0
max_dbe = 40.0
mz_error_score_weight = 0.6
isotopologue_score_weight = 0.4
adduct_atoms_neg = [ "Cl", "Br",]
adduct_atoms_pos = [ "Na", "K",]
score_methods = [ "S_P_lowest_error", "N_S_P_lowest_error", "lowest_error", "prob_score", "air_filter_error", "water_filter_error", "earth_filter_error",]
score_method = "prob_score"
output_min_score = 0.1
output_score_method = "All Candidates"
isRadical = false
isProtonated = true
isAdduct = false
ion_types_excluded = [ "[M+HCOO]-",]
ionization_type = "ESI"
min_ppm_error = -10.0
max_ppm_error = 10.0
min_abun_error = -100.0
max_abun_error = 100.0
mz_error_range = 1.5
error_method = "None"
mz_error_average = 0.0

[MassSpecPeak.kendrick_base]
C = 1
H = 2

[MS1MolecularSearch.usedAtoms]
C = [ 10, 100,]
H = [ 18, 200,]
N = [ 0, 3,]
P = [ 0, 1,]
O = [ 1, 23,]
S = [ 0, 1,]
Na = [ 0, 1,]
Cl = [ 0, 1,]

[MS1MolecularSearch.used_atom_valences]
C = 4
13C = 4
N = 3
O = 2
S = 2
H = 1
F = 1
Cl = 1
Br = 1
I = 1
At = 1
Li = 1
Na = 1
K = 1
Rb = 1
Cs = 1
Fr = 1
B = 4
In = 3
Al = 3
P = 3
Ga = 3
Mg = 2
Be = 2
Ca = 2
Sr = 2
Ba = 2
Ra = 2
V = 5
Fe = 3
Si = 4
Sc = 3
Ti = 4
Cr = 1
Mn = 1
Co = 1
Ni = 1
Cu = 2
Zn = 2
Ge = 4
As = 5
Se = 6
Y = 3
Zr = 4
Nb = 5
Mo = 6
Tc = 7
Ru = 8
Rh = 6
Pd = 4
Ag = 0
Cd = 2
Sn = 4
Sb = 5
Te = 6
La = 3
Hf = 4
Ta = 5
W = 6
Re = 4
Os = 4
Ir = 4
Pt = 4
Au = 3
Hg = 1
Tl = 3
Pb = 4
Bi = 3
Po = 2
Ac = 3

[MS2MolecularSearch.usedAtoms]
C = [ 1, 100,]
H = [ 1, 200,]

[MS2MolecularSearch.used_atom_valences]
C = 4
13C = 4
N = 3
O = 2
S = 2
H = 1
F = 1
Cl = 1
Br = 1
I = 1
At = 1
Li = 1
Na = 1
K = 1
Rb = 1
Cs = 1
Fr = 1
B = 4
In = 3
Al = 3
P = 3
Ga = 3
Mg = 2
Be = 2
Ca = 2
Sr = 2
Ba = 2
Ra = 2
V = 5
Fe = 3
Si = 4
Sc = 3
Ti = 4
Cr = 1
Mn = 1
Co = 1
Ni = 1
Cu = 2
Zn = 2
Ge = 4
As = 5
Se = 6
Y = 3
Zr = 4
Nb = 5
Mo = 6
Tc = 7
Ru = 8
Rh = 6
Pd = 4
Ag = 0
Cd = 2
Sn = 4
Sb = 5
Te = 6
La = 3
Hf = 4
Ta = 5
W = 6
Re = 4
Os = 4
Ir = 4
Pt = 4
Au = 3
Hg = 1
Tl = 3
Pb = 4
Bi = 3
Po = 2
Ac = 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[ms2]
scan_filter = "hcd"
resolution = "high"

[ms2_cid]
scan_filter = "cid"
resolution = "low"
6 changes: 6 additions & 0 deletions configuration/lipidomics_metams.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
directory = "data/raw_data/lipid_data"
output_directory = "output"
corems_toml_path = "configuration/lipid_configs/emsl_lipidomics_corems_params.toml"
metabref_token_path = "metabref.token"
scan_translator_path = "configuration/lipid_configs/emsl_lipidomics_scan_translator.toml"
cores = 1
Loading

0 comments on commit f980552

Please sign in to comment.