Skip to content

Commit

Permalink
Merge pull request #226 from paretje/feature/gzip
Browse files Browse the repository at this point in the history
support reading gzipped spectrum files
  • Loading branch information
RalfG authored Nov 1, 2024
2 parents d2a4281 + 14e4330 commit d87872f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 17 deletions.
2 changes: 1 addition & 1 deletion docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ For instance:

.. code-block:: sh
ms2pip correlate results.sage.tsv --spectrum-file spectra.mgf
ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf
``get-training-data``
Expand Down
21 changes: 5 additions & 16 deletions ms2pip/spectrum_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
If the file extension is not supported.
"""
file_extension = Path(spectrum_file).suffix.lower()
if file_extension not in [".mgf", ".mzml", ".d"] and not _is_minitdf(spectrum_file):
raise UnsupportedSpectrumFiletypeError(file_extension)
try:
spectra = get_ms2_spectra(str(spectrum_file))
except ValueError:
raise UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes)

for spectrum in get_ms2_spectra(str(spectrum_file)):
for spectrum in spectra:
obs_spectrum = ObservedSpectrum(
mz=np.array(spectrum.mz, dtype=np.float32),
intensity=np.array(spectrum.intensity, dtype=np.float32),
Expand All @@ -50,15 +51,3 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
):
continue
yield obs_spectrum


def _is_minitdf(spectrum_file: str) -> bool:
"""
Check if the spectrum file is a Bruker miniTDF folder.
A Bruker miniTDF folder has no fixed name, but contains files matching the patterns
``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``.
"""
files = set(Path(spectrum_file).glob("*ms2spectrum.bin"))
files.update(Path(spectrum_file).glob("*ms2spectrum.parquet"))
return len(files) >= 2

0 comments on commit d87872f

Please sign in to comment.