Skip to content

Commit

Permalink
refactor: new parameter to filter empty isa table rows.
Browse files Browse the repository at this point in the history
  • Loading branch information
oyurekten committed Sep 23, 2024
1 parent 2c16d3c commit d6aa664
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
35 changes: 34 additions & 1 deletion metabolights_utils/isatab/default/parser/isa_table_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pathlib
import re
from functools import partial
from io import IOBase
Expand All @@ -20,19 +21,49 @@
from metabolights_utils.utils.hash_utils import MetabolightsHashUtils as HashUtils


def fix_empty_rows(
file_path: str,
messages: List[ParserMessage],
read_encoding: str,
write_encoding: str,
):
with open(file_path, "r", encoding=read_encoding) as f:
lines = f.readlines()
updated_lines = [
line.strip("\n").strip("\r") for line in lines if line and line.strip()
]
updated_lines = [f"{line}\n" for line in updated_lines]
if len(updated_lines) != len(lines):
basename = os.path.basename(file_path)
messages.append(
ParserMessage(
type=ParserMessageType.WARNING,
short=f"Empty rows are in file: {basename}",
detail=f"Empty rows are removed from {basename}",
)
)
with open(file_path, "w", encoding=write_encoding) as f:
f.writelines(updated_lines)


def parse_isa_file_content(
parser: Callable,
file_path: str,
messages: List[ParserMessage],
fix_unicode_exceptions: bool = False,
remove_empty_rows: bool = False,
) -> Tuple[IsaTableFile, List[ParserMessage]]:
try:
if remove_empty_rows:
fix_empty_rows(file_path, messages, "utf-8", "utf-8")
with open(file_path, "r", encoding="utf-8") as f:
model = parser(f, messages=messages)
return model, messages
except UnicodeDecodeError as ex:
if fix_unicode_exceptions:
try:
if remove_empty_rows:
fix_empty_rows(file_path, messages, "latin-1", "latin-1")
with open(file_path, "r", encoding="latin-1") as f:
model = parser(f, messages=messages)
message = ParserMessage(
Expand Down Expand Up @@ -74,6 +105,7 @@ def parse_isa_table_sheet_from_fs(
filter_options: List[TsvFileFilterOption] = None,
sort_options: List[TsvFileSortOption] = None,
fix_unicode_exceptions: bool = False,
remove_empty_rows: bool = False,
) -> Tuple[IsaTableFile, List[ParserMessage]]:
basename = os.path.basename(file_path)
dirname = os.path.basename(os.path.dirname(file_path))
Expand Down Expand Up @@ -121,12 +153,13 @@ def parse_isa_table_sheet_from_fs(
file_path=file_path,
messages=read_messages,
fix_unicode_exceptions=fix_unicode_exceptions,
remove_empty_rows=remove_empty_rows,
)
isa_table_file: IsaTableFile = table
if isa_table_file:
if os.path.exists(file_path):
isa_table_file.sha256_hash = HashUtils.sha256sum(file_path)
if isa_table_file.table.columns:
if isa_table_file.table:
messages.extend(read_messages)
messages = [x for x in messages if x.type != ParserMessageType.INFO]
return isa_table_file, messages
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
import pathlib
import shutil
from typing import List

from metabolights_utils.isatab import Writer
from metabolights_utils.isatab.default.assay_file import DefaultAssayFileReader
from metabolights_utils.isatab.default.assignment_file import (
DefaultAssignmentFileReader,
Expand Down Expand Up @@ -39,6 +41,31 @@ def test_parse_isa_table_sheet_from_fs_valid_sample_01():
assert not messages


def test_parse_isa_table_sheet_from_fs_valid_sample_02():
file_path = pathlib.Path("tests/test-data/MTBLS9999998/s_MTBLS9999998.txt")
file_path_output = pathlib.Path("test-temp/MTBLS9999998")
output_sample_file_path = file_path_output / pathlib.Path("s_MTBLS9999998.txt")
os.makedirs(str(file_path_output), exist_ok=True)
shutil.copy(file_path, output_sample_file_path)
reader = DefaultSampleFileReader(results_per_page=50)
patterns = reader.get_expected_patterns()
assert patterns
table, messages = parse_isa_table_sheet_from_fs(
output_sample_file_path,
expected_patterns=patterns,
remove_empty_rows=True,
)
assert table
assert messages
writer = Writer.get_sample_file_writer()
result = writer.save_isa_table(
file_path=str(output_sample_file_path),
file_sha256_hash=table.sha256_hash,
isa_table=table.table,
)
assert result.success


def test_parse_isa_table_sheet_from_fs_invalid_columns_01():
file_path = pathlib.Path(
"tests/test-data/isa-table-files/s_MTBLS1_invalid_columns.txt"
Expand Down

0 comments on commit d6aa664

Please sign in to comment.