Skip to content

Commit

Permalink
Some docstrings for peak-picking utilities.
Browse files Browse the repository at this point in the history
  • Loading branch information
mcbrider5002 committed Feb 3, 2024
1 parent 88debbf commit d766810
Showing 1 changed file with 104 additions and 0 deletions.
104 changes: 104 additions & 0 deletions vimms/PeakPicking.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ def report_boxes(cls, output_path):

@dataclass
class MZMineParams(AbstractParams):
"""
Wrapper class to run MZMine 2 peak-picking from the ViMMS codebase.
MZMine 2 allows commands for its processing pipeline to be stored in an .xml
and then run via command line using its "batch mode" executable. Given an
appropriate "template" .xml this class will substitute input and output file
names into it and then run it in batch mode via subprocess.
NOTE: MZMine is not installed with ViMMS. It must be installed separately
and the path to the "batch mode" executable specified for this class.
Args:
mzmine_template: Path to .xml template giving batch commands.
mzmine_exe: Path to batch mode executable.
"""
method_name = "MZMine"

RT_FACTOR = 60 #minutes
Expand Down Expand Up @@ -78,6 +92,19 @@ def _make_batch_file(self, input_files, output_dir, output_name, output_path):
return new_xml

def pick_aligned_peaks(self, input_files, output_dir, output_name, force=False):
"""
Run MZMine batch mode file for a list of input files.
Args:
input_files: Iterable of paths to input files.
output_dir: Directory to write output to.
output_name: Name for output file. Some text and the file extension
are added automatically.
force: When False, don't run peak-picking if a file already exists
at the output destination.
Returns: Full path the output file was written to.
"""
input_files = list(set(input_files)) #filter duplicates
output_path = self.format_output_path(output_dir, output_name)

Expand All @@ -91,6 +118,22 @@ def pick_aligned_peaks(self, input_files, output_dir, output_name, force=False):

@staticmethod
def check_files_match(fullscan_names, aligned_path, mode="subset"):
"""
Check that the source files listed in the header of a peak-picking
output match an input list.
Args:
fullscan_names: List of .mzml files (or paths to them) to look
for in the header of the aligned file.
aligned_path: Full filepath to the aligned file.
mode: "subset" just checks if all fullscan_names can be found in
the header. "exact" checks whether or not the two sets of
names exactly match.
Returns: Tuple of boolean reporting whether test succeeded, the
names of the fullscans given as input, and the names of files
found in the header.
"""
fs_names = {os.path.basename(fs) for fs in fullscan_names}
mzmine_names = set()

Expand All @@ -116,6 +159,23 @@ def check_files_match(fullscan_names, aligned_path, mode="subset"):

@staticmethod
def read_aligned_csv(box_file_path):
"""
Parse in an aligned boxfile in MZMine 2 format. Each column
in an aligned boxfile either has properties related to the whole
row (e.g. average m/z of the peak aligned on that row) or a property
specific property of an unaligned peak from a parent .mzML. Row
properties are parsed into a list of dictionaries (one dictionary
per row) in the form [{property_name: value}, ...]. .mzML properties
are loaded into a similar list but with a nested dictionary
i.e. [{mzml_name: {property_name: value}}, ...].
Args:
box_file_path: Full path to the aligned boxfile.
Returns: Tuple of .mzML names and iterable of pairs of row dicts
and .mzML dicts.
"""

row_headers = [
"row ID",
"row m/z",
Expand Down Expand Up @@ -156,13 +216,37 @@ def pick_aligned_peaks(input_files,
mzmine_template,
mzmine_exe,
force=False):
"""
Convenience function (for backwards compatibility) which picks
peaks using MZMineParams.
"""

params = MZMineParams(mzmine_template, mzmine_exe)
return params.pick_aligned_peaks(input_files, output_dir, output_name, force=force)


@dataclass
class XCMSScriptParams(AbstractParams):
"""
Wrapper class to run XCMS scripts written in R from ViMMS. The R script
is run via subprocess and is given all arguments specified in the object
instance as command-line arguments - the R script must handle any that
are not None. XCMS does not natively write out aligned peaks so methods
for reading output files assume they were written in the same format as
MZMineParams.
NOTE: R and XCMS are not installed with ViMMS. They must be installed
separately and the paths to both the Rscript utility and the XCMS
script to run must be specified for this class.
Args:
xcms_r_script: Path to the XCMS script written in R which should
be run.
rscript_exe: Path to the "Rscript" utility packaged with R. By
default assumes it can be found via the "Rscript" environment
variable.
others: See xcms documentation for details.
"""
#TODO: It would be good to just call the R functions from Python
#instead of calling an external R script...

Expand Down Expand Up @@ -191,6 +275,20 @@ class XCMSScriptParams(AbstractParams):
kNN: int = None

def pick_aligned_peaks(self, input_files, output_dir, output_name, force=False):
"""
Run XCMS script for a list of input files.
Args:
input_files: Iterable of paths to input files.
output_dir: Directory to write output to.
output_name: Name for output file. Some text and the file extension
are added automatically.
force: When False, don't run peak-picking if a file already exists
at the output destination.
Returns: Full path the output file was written to.
"""

input_files = list(set(input_files)) #filter duplicates
output_path = self.format_output_path(output_dir, output_name)

Expand All @@ -214,8 +312,14 @@ def pick_aligned_peaks(self, input_files, output_dir, output_name, force=False):

@staticmethod
def check_files_match(fullscan_names, aligned_path, mode="subset"):
"""
Wrapper to MZMineParams' "check_files_match".
"""
return MZMineParams.check_files_match(fullscan_names, aligned_path, mode=mode)

@staticmethod
def read_aligned_csv(box_file):
"""
Wrapper to MZMineParams' "read_aligned_csv".
"""
return MZMineParams.read_aligned_csv(box_file)

0 comments on commit d766810

Please sign in to comment.