diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 8dd399a..0000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 88 -extend-ignore = E203 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index b9fb3f3..0000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile=black diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca4f9bd..af0e053 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,29 +15,19 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace - - repo: https://github.com/PyCQA/isort - rev: "5.12.0" + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.282 hooks: - - id: isort - - repo: https://github.com/PyCQA/autoflake - rev: v2.0.1 - hooks: - - id: autoflake - args: [--in-place, --remove-all-unused-imports] + - id: ruff + args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/psf/black - rev: '23.1.0' + rev: '23.7.0' hooks: - id: black - - repo: https://github.com/PyCQA/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-typing-imports] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.0 + rev: v1.4.1 hooks: - id: mypy additional_dependencies: [types-requests, types-PyYAML] ci: autoupdate_branch: develop - skip: [flake8, mypy] diff --git a/CHANGELOG.md b/CHANGELOG.md index 055fe21..b279f5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.0] - 2023-08-11 + +Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6) + +Implemented lenient extraction of corrupted imaging data [#19](https://github.com/BodenmillerGroup/readimc/pull/19) + ## [0.6.2] - 2023-01-31 Maintenance release @@ -97,7 +103,7 @@ Minor documentation changes ## [0.1.0] - 2021-10-09 Initial release - +[0.7.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.2...v0.7.0 [0.6.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.1...v0.6.2 [0.6.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.0...v0.6.1 [0.6.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.5.0...v0.6.0 diff --git a/pyproject.toml b/pyproject.toml index dfee5e0..e493034 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,5 +2,12 @@ requires = ["setuptools>=64", "wheel", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" +[tool.pytest.ini_options] +addopts = ["--cov=readimc", "--cov-report=xml:coverage.xml"] +testpaths = ["tests"] + +[tool.ruff] +select = ["E", "F", "I"] + [tool.setuptools_scm] write_to = "readimc/_version.py" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 1261316..0000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -addopts = --cov=readimc --cov-report xml:coverage.xml -testpaths = - tests diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 0e13e87..d49ed0b 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -90,10 +90,13 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray: + def read_acquisition( + self, acquisition: Optional[Acquisition] = None, strict: bool = True + ) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read + :param strict: set this parameter to False to try to recover corrupted data :return: the acquisition data as 32-bit floating point array, shape: (c, y, x) """ @@ -123,7 +126,12 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar if data_size % bytes_per_pixel != 0: data_size += 1 if data_size % bytes_per_pixel != 0: - raise IOError( + if strict: + raise IOError( + f"MCD file '{self.path.name}' corrupted: " + "invalid acquisition image data size" + ) + warn( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" ) @@ -136,15 +144,35 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar offset=data_start_offset, shape=(num_pixels, num_channels + 3), ) - width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 + xs = data[:, 0].astype(int) + ys = data[:, 1].astype(int) + try: + width = int(acquisition.metadata["MaxX"]) + height = int(acquisition.metadata["MaxY"]) + if width <= np.amax(xs) or height <= np.amax(ys): + raise ValueError( + "data shape is incompatible with acquisition image dimensions" + ) + except (KeyError, ValueError): + warn( + f"MCD file '{self.path.name}' corrupted: " + "cannot read acquisition image dimensions; recovering from data shape" + ) + width = np.amax(xs) + 1 + height = np.amax(ys) + 1 if width * height != data.shape[0]: - raise IOError( + if strict: + raise IOError( + f"MCD file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + warn( f"MCD file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" ) - img = np.zeros((height, width, num_channels), dtype=np.float32) - img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] - return np.moveaxis(img, -1, 0) + img = np.zeros((num_channels, height, width), dtype=np.float32) + img[:, ys, xs] = np.transpose(data[:, 3:]) + return img def read_slide(self, slide: Slide) -> Optional[np.ndarray]: """Reads and decodes a slide image as numpy array using the ``imageio`` diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index da4ed41..248b273 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -1,3 +1,4 @@ +import itertools import re from typing import Dict, List, Optional, Tuple from warnings import warn @@ -121,6 +122,16 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: slide.acquisitions.append(acquisition) if panorama is not None: panorama.acquisitions.append(acquisition) + for a, b in itertools.combinations(slide.acquisitions, 2): + a_start = a.metadata["DataStartOffset"] + a_end = a.metadata["DataEndOffset"] + b_start = b.metadata["DataStartOffset"] + b_end = b.metadata["DataEndOffset"] + if b_start <= a_start < b_end or b_start < a_end <= b_end: + warn( + f"Slide {slide.id} corrupted: " + f"overlapping memory blocks for acquisitions {a.id} and {b.id}" + ) slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) return slide diff --git a/readimc/txt_file.py b/readimc/txt_file.py index 4f92926..b8e7bd6 100644 --- a/readimc/txt_file.py +++ b/readimc/txt_file.py @@ -1,6 +1,7 @@ import re from os import PathLike from typing import List, Optional, Sequence, TextIO, Tuple, Union +from warnings import warn import numpy as np import pandas as pd @@ -93,11 +94,14 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray: + def read_acquisition( + self, acquisition: Optional[Acquisition] = None, strict: bool = True + ) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read (for compatibility with ``IMCFile`` and ``MCDFile``; unused) + :param strict: set this parameter to False to try to recover corrupted data :return: the acquisition data as 32-bit floating point array, shape: (c, y, x) """ @@ -121,7 +125,12 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar ) width, height = df[["X", "Y"]].add(1).max(axis=0).astype(int) if width * height != len(df.index): - raise IOError( + if strict: + raise IOError( + f"TXT file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + warn( f"TXT file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" ) diff --git a/requirements_devel.txt b/requirements_devel.txt index fb7a1bb..4abdba6 100644 --- a/requirements_devel.txt +++ b/requirements_devel.txt @@ -1,5 +1,4 @@ black -flake8 -isort mypy pre-commit +ruff diff --git a/setup.cfg b/setup.cfg index 16d411f..2776a70 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,3 +28,7 @@ install_requires = pandas python_requires = >=3.8 packages = find: + +[flake8] +max-line-length = 88 +extend-ignore = E203