Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to FlashLFQ output from psm_utils #186

Merged
merged 6 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/config_schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- **One of**
- *string*
- *null*
- **`write_flashlfq`** *(boolean)*: Write results to a FlashLFQ-compatible file. Default: `false`.
- **`write_report`** *(boolean)*: Write an HTML report with various QC metrics and charts. Default: `false`.
- **`profile`** *(boolean)*: Write a txt report using cProfile for profiling. Default: `false`.
## Definitions
Expand All @@ -93,7 +94,6 @@
- **`train_fdr`** *(number)*: FDR threshold for training Mokapot. Minimum: `0`. Maximum: `1`. Default: `0.01`.
- **`write_weights`** *(boolean)*: Write Mokapot weights to a text file. Default: `false`.
- **`write_txt`** *(boolean)*: Write Mokapot results to a text file. Default: `false`.
- **`write_flashlfq`** *(boolean)*: Write Mokapot results to a FlashLFQ-compatible file. Default: `false`.
- <a id="definitions/percolator"></a>**`percolator`** *(object)*: Percolator rescoring engine configuration. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*.
- **`init-weights`**: Weights file for scoring function. Default: `false`.
- **One of**
Expand Down
3 changes: 0 additions & 3 deletions examples/msgfplus-ms2rescore.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
},
"log_level": "debug",
"processes": 16,
"feature_generators": {
"basic": {}
},
"rescoring_engine": {
"mokapot": {
"fasta_file": "examples/proteins/uniprot-proteome-human-contaminants.fasta",
Expand Down
18 changes: 0 additions & 18 deletions examples/msgfplus-ms2rescore.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,7 @@ psm_reader_kwargs = { "score_column" = "PSMScore" }
log_level = "debug"
processes = 16

# [ms2rescore.modification_mapping]

# [ms2rescore.fixed_modifications]

[ms2rescore.feature_generators.basic]
# No options, but setting heading enables feature generator

# [ms2rescore.feature_generators.ms2pip]
# model = "HCD"
# ms2_tolerance = 0.02

# [ms2rescore.feature_generators.deeplc]
# deeplc_retrain = false

# [ms2rescore.feature_generators.maxquant]
# No options, but setting heading enables feature generator

[ms2rescore.rescoring_engine.mokapot]
fasta_file = "examples/proteins/uniprot-proteome-human-contaminants.fasta"
write_weights = true
write_txt = true
# write_flashlfq = true
10 changes: 10 additions & 0 deletions ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
logger.info(f"Writing output to {output_file_root}.psms.tsv...")
psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")

if config["write_flashlfq"]:
logger.info(f"Writing output to {output_file_root}.flashlfq.tsv...")
psm_utils.io.write_file(
psm_list,
output_file_root + ".flashlfq.tsv",
filetype="flashlfq",
fdr_threshold=0.01,
only_target=True, # TODO: Make FDR threshold configurable
)

# Write report
if config["write_report"]:
try:
Expand Down
33 changes: 19 additions & 14 deletions ms2rescore/gui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,17 @@ def __init__(self, *args, **kwargs):
)
self.usi.grid(row=1, column=0, pady=(0, 10), sticky="nsew")

self.write_flashlfq = widgets.LabeledSwitch(
self,
label="Write FlashLFQ input file",
description=(
"Write a file that can be used as input for FlashLFQ. This file only contains "
"target PSMs that pass the FDR threshold."
),
wraplength=CONFIG_WIDTH - 180,
)
self.write_flashlfq.grid(row=2, column=0, pady=(0, 10), sticky="nsew")

self.generate_report = widgets.LabeledSwitch(
self,
label="Generate interactive report",
Expand All @@ -380,7 +391,7 @@ def __init__(self, *args, **kwargs):
wraplength=CONFIG_WIDTH - 180,
default=True,
)
self.generate_report.grid(row=2, column=0, pady=(0, 10), sticky="nsew")
self.generate_report.grid(row=3, column=0, pady=(0, 10), sticky="nsew")

self.id_decoy_pattern = widgets.LabeledEntry(
self,
Expand All @@ -392,7 +403,7 @@ def __init__(self, *args, **kwargs):
),
wraplength=CONFIG_WIDTH - 180,
)
self.id_decoy_pattern.grid(row=3, column=0, pady=(0, 10), sticky="nsew")
self.id_decoy_pattern.grid(row=4, column=0, pady=(0, 10), sticky="nsew")

self.psm_id_pattern = widgets.LabeledEntry(
self,
Expand All @@ -404,7 +415,7 @@ def __init__(self, *args, **kwargs):
),
wraplength=CONFIG_WIDTH - 180,
)
self.psm_id_pattern.grid(row=4, column=0, pady=(0, 10), sticky="nsew")
self.psm_id_pattern.grid(row=5, column=0, pady=(0, 10), sticky="nsew")

self.spectrum_id_pattern = widgets.LabeledEntry(
self,
Expand All @@ -414,7 +425,7 @@ def __init__(self, *args, **kwargs):
),
wraplength=CONFIG_WIDTH - 180,
)
self.spectrum_id_pattern.grid(row=5, column=0, pady=(0, 10), sticky="nsew")
self.spectrum_id_pattern.grid(row=6, column=0, pady=(0, 10), sticky="nsew")

self.processes = widgets.LabeledOptionMenu(
self,
Expand All @@ -428,7 +439,7 @@ def __init__(self, *args, **kwargs):
values=[str(x) for x in list(range(1, min(16, multiprocessing.cpu_count()) + 1))],
default_value=str(min(16, multiprocessing.cpu_count())),
)
self.processes.grid(row=6, column=0, pady=(0, 10), sticky="nsew")
self.processes.grid(row=7, column=0, pady=(0, 10), sticky="nsew")

self.file_prefix = widgets.LabeledFileSelect(
self,
Expand All @@ -441,7 +452,7 @@ def __init__(self, *args, **kwargs):
),
wraplength=CONFIG_WIDTH - 20,
)
self.file_prefix.grid(row=7, column=0, columnspan=2, sticky="nsew")
self.file_prefix.grid(row=8, column=0, columnspan=2, sticky="nsew")

self.config_file = widgets.LabeledFileSelect(
self,
Expand All @@ -453,13 +464,14 @@ def __init__(self, *args, **kwargs):
),
wraplength=CONFIG_WIDTH - 20,
)
self.config_file.grid(row=8, column=0, columnspan=2, sticky="nsew")
self.config_file.grid(row=9, column=0, columnspan=2, sticky="nsew")

def get(self) -> Dict:
"""Get the configured values as a dictionary."""
return {
"lower_score_is_better": bool(int(self.lower_score.get())), # str repr of 0 or 1
"rename_to_usi": self.usi.get(),
"write_flashlfq": self.write_flashlfq.get(),
"write_report": self.generate_report.get(),
"id_decoy_pattern": self.id_decoy_pattern.get(),
"psm_id_pattern": self.psm_id_pattern.get(),
Expand Down Expand Up @@ -732,12 +744,6 @@ def __init__(self, *args, **kwargs):
self.write_txt.grid(row=row_n, column=0, pady=(0, 10), sticky="nsew")
row_n += 1

self.write_flashlfq = widgets.LabeledSwitch(
self, label="Write file for FlashLFQ", default=False
)
self.write_flashlfq.grid(row=row_n, column=0, pady=(0, 10), sticky="nsew")
row_n += 1

self.fasta_file = widgets.LabeledFileSelect(
self,
label="Select FASTA file (optional, required for protein inference)",
Expand All @@ -760,7 +766,6 @@ def get(self) -> Dict:
config = {
"write_weights": self.write_weights.get(),
"write_txt": self.write_txt.get(),
"write_flashlfq": self.write_flashlfq.get(),
"fasta_file": self.fasta_file.get(),
"protein_kwargs": self._parse_protein_kwargs(self.protein_kwargs.get()),
}
Expand Down
4 changes: 2 additions & 2 deletions ms2rescore/package_data/config_default.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"mokapot": {
"train_fdr": 0.01,
"write_weights": true,
"write_txt": true,
"write_flashlfq": true
"write_txt": true
}
},
"config_file": null,
Expand All @@ -40,6 +39,7 @@
"processes": -1,
"rename_to_usi": false,
"fasta_file": null,
"write_flashlfq": false,
"write_report": false
}
}
3 changes: 1 addition & 2 deletions ms2rescore/package_data/config_default_tims.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"rescoring_engine": {
"mokapot": {
"write_weights": true,
"write_txt": true,
"write_flashlfq": true
"write_txt": true
}
},
"psm_file": null
Expand Down
10 changes: 5 additions & 5 deletions ms2rescore/package_data/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@
"description": "Path to FASTA file with protein sequences to use for protein inference",
"oneOf": [{ "type": "string" }, { "type": "null" }]
},
"write_flashlfq": {
"description": "Write results to a FlashLFQ-compatible file",
"type": "boolean",
"default": false
},
"write_report": {
"description": "Write an HTML report with various QC metrics and charts",
"type": "boolean",
Expand Down Expand Up @@ -295,11 +300,6 @@
"description": "Write Mokapot results to a text file",
"type": "boolean",
"default": false
},
"write_flashlfq": {
"description": "Write Mokapot results to a FlashLFQ-compatible file",
"type": "boolean",
"default": false
}
}
},
Expand Down
21 changes: 8 additions & 13 deletions ms2rescore/rescoring_engines/mokapot.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def rescore(
train_fdr: float = 0.01,
write_weights: bool = False,
write_txt: bool = False,
write_flashlfq: bool = False,
protein_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
Expand All @@ -57,8 +56,7 @@ def rescore(
:py:class:`~mokapot.dataset.LinearPsmDataset`, and then optionally adds protein information
from a FASTA file. The dataset is then passed to the :py:func:`~mokapot.brew` function, which
returns the new scores, q-values, and PEPs. These are then written back to the original
:py:class:`~psm_utils.psm_list.PSMList`. Optionally, results can be written to a Mokapot text
file, a FlashLFQ-compatible file, or the model weights can be saved.
:py:class:`~psm_utils.psm_list.PSMList`.

Parameters
----------
Expand All @@ -75,8 +73,6 @@ def rescore(
Write model weights to a text file. Defaults to ``False``.
write_txt
Write Mokapot results to a text file. Defaults to ``False``.
write_flashlfq
Write Mokapot results to a FlashLFQ-compatible file. Defaults to ``False``.
protein_kwargs
Keyword arguments to pass to the :py:meth:`~mokapot.dataset.LinearPsmDataset.add_proteins`
method.
Expand All @@ -86,6 +82,13 @@ def rescore(
"""
_set_log_levels()

if "write_flashlfq" in kwargs:
_ = kwargs.pop("write_flashlfq")
logger.warning(
"The `write_flashlfq` argument has moved. To write FlashLFQ generic TSV, use the "
"MS²Rescore-level `write_flashlfq` option instead."
)

# Convert PSMList to Mokapot dataset
lin_psm_data = convert_psm_list(psm_list)
feature_names = list(lin_psm_data.features.columns)
Expand Down Expand Up @@ -119,10 +122,6 @@ def rescore(
)
if write_txt:
confidence_results.to_txt(file_root=output_file_root, decoys=True)
if write_flashlfq:
# TODO: How do we validate that the RTs are in minutes?
confidence_results.psms["retention_time"] = confidence_results.psms["retention_time"] * 60
confidence_results.to_flashlfq(output_file_root + ".mokapot.flashlfq.txt")


def convert_psm_list(
Expand Down Expand Up @@ -167,10 +166,6 @@ def convert_psm_list(
feature_df.columns = [f"feature:{f}" for f in feature_df.columns]
combined_df = pd.concat([psm_df[required_columns], feature_df], axis=1)

# Ensure filename for FlashLFQ txt output
if not combined_df["run"].notnull().all():
combined_df["run"] = "na"

feature_names = [f"feature:{f}" for f in feature_names] if feature_names else None

lin_psm_data = LinearPsmDataset(
Expand Down
Loading