Skip to content

Commit

Permalink
Merge pull request #2 from jaspervdh/main
Browse files Browse the repository at this point in the history
parse tsv or csv file and write to Peptidoforms
  • Loading branch information
jaspervdh authored Sep 2, 2024
2 parents 0969c35 + ddbe1d1 commit 9e648a2
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 1 deletion.
41 changes: 41 additions & 0 deletions mumble/mumble.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,47 @@ def write_modified_psm_list(self, psm_list, output_file=None, psm_file_type="tsv
logger.info(f"Writing modified PSM list to {output_file}")
write_file(psm_list=psm_list, filename=output_file, filetype=psm_file_type)

def parse_csv_file(self, file_name: str, delimiter: str = "\t") -> list:
"""
Write simple input that takes tsv or csv file with: peptidoform, spectrum_id, precursor_mz and write to Peptidoforms
Args:
file_name (str): Path to the CSV or TSV file.
delimiter (str, optional): Delimiter used in the file. Defaults to "\t"
return:
list of Peptidoforms
"""
try:
df = pd.read_csv(file_name, delimiter=delimiter)
except FileNotFoundError as e:
logging.error(f"File not found: {e}")
return []
except pd.errors.EmptyDataError as e:
logging.error(f"Empty data: {e}")
return []
except pd.errors.ParserError as e:
logging.error(f"Parsing error: {e}")
return []

required_columns = {"peptidoform", "spectrum_id", "precursor_mz"}

if not required_columns.issubset(df.columns):
missing = required_columns - set(df.columns)
logging.error(f"Missing required columns: {missing}")
return []

# Clean up any whitespace in DataFrame
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)

peptidoforms = [
PSM(peptidoform=row["peptidoform"], spectrum_id=row["spectrum_id"], precursor_mz=row["precursor_mz"])
for _, row in df.iterrows()
]

return peptidoforms



class _ModificationHandler:
"""Class that handles modifications."""
Expand Down
59 changes: 58 additions & 1 deletion tests/test_mumble.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch, mock_open
import pandas as pd
from io import StringIO
from collections import namedtuple
from psm_utils import PSMList, PSM, Peptidoform
from pyteomics import proforma
Expand Down Expand Up @@ -99,6 +101,61 @@ def test_add_modified_psms(self, setup_psmhandler):
assert isinstance(new_psm_list, PSMList)
assert len(new_psm_list) > 1

def test_parse_csv_file_valid(self, setup_psmhandler):

# psm_handler, mod_handler, psm = setup_psmhandler
psm_handler = setup_psmhandler[0]

# Mock CSV data
csv_data = """peptidoform\tspectrum_id\tprecursor_mz
ART[Deoxy]HR/2\tspec1\t214.1
ABCD/2\tspec2\t300.2
"""

with patch("builtins.open", mock_open(read_data=csv_data)), \
patch("pandas.read_csv", return_value=pd.read_csv(StringIO(csv_data), delimiter="\t")):
peptidoforms = psm_handler.parse_csv_file("dummy_file.tsv")

assert len(peptidoforms) == 2
assert peptidoforms[0].peptidoform == Peptidoform("ART[Deoxy]HR/2")
assert peptidoforms[0].spectrum_id == "spec1"
assert peptidoforms[0].precursor_mz == 214.1

def test_parse_csv_file_missing_columns(self, setup_psmhandler):
psm_handler = setup_psmhandler[0]

# Mock CSV data with missing 'precursor_mz' column
csv_data = """peptidoform\tspectrum_id
ART[Deoxy]HR\tspec1
ABCD\tspec2
"""

with patch("builtins.open", mock_open(read_data=csv_data)), \
patch("pandas.read_csv", return_value=pd.read_csv(StringIO(csv_data), delimiter="\t")):
peptidoforms = psm_handler.parse_csv_file("dummy_file.tsv", delimiter="\t")

assert peptidoforms == [] # Should return an empty list due to missing columns

def test_parse_csv_file_file_not_found(self, setup_psmhandler):
psm_handler = setup_psmhandler[0]

with patch("builtins.open", side_effect=FileNotFoundError):
peptidoforms = psm_handler.parse_csv_file("non_existent_file.tsv", delimiter="\t")

assert peptidoforms == [] # Should return an empty list due to FileNotFoundError

def test_parse_csv_file_empty_file(self, setup_psmhandler):
psm_handler = setup_psmhandler[0]

# Mock empty CSV data
csv_data = """peptidoform\tspectrum_id\tprecursor_mz"""

with patch("builtins.open", mock_open(read_data=csv_data)), \
patch("pandas.read_csv", return_value=pd.read_csv(StringIO(csv_data), delimiter="\t")):
peptidoforms = psm_handler.parse_csv_file("dummy_file.tsv", delimiter="\t")

assert peptidoforms == [] # Should return an empty list due to empty file


class TestModificationHandler:

Expand Down

0 comments on commit 9e648a2

Please sign in to comment.