Merge pull request #21 from BodenmillerGroup/develop

Develop
BodenmillerGroup · Nov 23, 2023 · c36bd1d · c36bd1d
2 parents e39e944 + 8eef98d
commit c36bd1d
Show file tree

Hide file tree

Showing 11 changed files with 82 additions and 37 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.isort.cfg b/.isort.cfg
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,29 +15,19 @@ repos:
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
       - id: trailing-whitespace
-  - repo: https://github.com/PyCQA/isort
-    rev: "5.12.0"
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.0.282
     hooks:
-      - id: isort
-  - repo: https://github.com/PyCQA/autoflake
-    rev: v2.0.1
-    hooks:
-      - id: autoflake
-        args: [--in-place, --remove-all-unused-imports]
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
   - repo: https://github.com/psf/black
-    rev: '23.1.0'
+    rev: '23.7.0'
     hooks:
       - id: black
-  - repo: https://github.com/PyCQA/flake8
-    rev: "6.0.0"
-    hooks:
-      - id: flake8
-        additional_dependencies: [flake8-typing-imports]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.0.0
+    rev: v1.4.1
     hooks:
       - id: mypy
         additional_dependencies: [types-requests, types-PyYAML]
 ci:
   autoupdate_branch: develop
-  skip: [flake8, mypy]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.0] - 2023-08-11
+
+Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6)
+
+Implemented lenient extraction of corrupted imaging data  [#19](https://github.com/BodenmillerGroup/readimc/pull/19)
+
 ## [0.6.2] - 2023-01-31
 
 Maintenance release
@@ -97,7 +103,7 @@ Minor documentation changes
 ## [0.1.0] - 2021-10-09
 
 Initial release
-
+[0.7.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.2...v0.7.0
 [0.6.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.1...v0.6.2
 [0.6.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.0...v0.6.1
 [0.6.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.5.0...v0.6.0

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,5 +2,12 @@
 requires = ["setuptools>=64", "wheel", "setuptools_scm[toml]>=6.2"]
 build-backend = "setuptools.build_meta"
 
+[tool.pytest.ini_options]
+addopts = ["--cov=readimc", "--cov-report=xml:coverage.xml"]
+testpaths = ["tests"]
+
+[tool.ruff]
+select = ["E", "F", "I"]
+
 [tool.setuptools_scm]
 write_to = "readimc/_version.py"
diff --git a/pytest.ini b/pytest.ini
diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py
@@ -90,10 +90,13 @@ def close(self) -> None:
             self._fh.close()
             self._fh = None
 
-    def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray:
+    def read_acquisition(
+        self, acquisition: Optional[Acquisition] = None, strict: bool = True
+    ) -> np.ndarray:
         """Reads IMC acquisition data as numpy array.
 
         :param acquisition: the acquisition to read
+        :param strict: set this parameter to False to try to recover corrupted data
         :return: the acquisition data as 32-bit floating point array,
             shape: (c, y, x)
         """
@@ -123,7 +126,12 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar
         if data_size % bytes_per_pixel != 0:
             data_size += 1
         if data_size % bytes_per_pixel != 0:
-            raise IOError(
+            if strict:
+                raise IOError(
+                    f"MCD file '{self.path.name}' corrupted: "
+                    "invalid acquisition image data size"
+                )
+            warn(
                 f"MCD file '{self.path.name}' corrupted: "
                 "invalid acquisition image data size"
             )
@@ -136,15 +144,35 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar
             offset=data_start_offset,
             shape=(num_pixels, num_channels + 3),
         )
-        width, height = np.amax(data[:, :2], axis=0).astype(int) + 1
+        xs = data[:, 0].astype(int)
+        ys = data[:, 1].astype(int)
+        try:
+            width = int(acquisition.metadata["MaxX"])
+            height = int(acquisition.metadata["MaxY"])
+            if width <= np.amax(xs) or height <= np.amax(ys):
+                raise ValueError(
+                    "data shape is incompatible with acquisition image dimensions"
+                )
+        except (KeyError, ValueError):
+            warn(
+                f"MCD file '{self.path.name}' corrupted: "
+                "cannot read acquisition image dimensions; recovering from data shape"
+            )
+            width = np.amax(xs) + 1
+            height = np.amax(ys) + 1
         if width * height != data.shape[0]:
-            raise IOError(
+            if strict:
+                raise IOError(
+                    f"MCD file '{self.path.name}' corrupted: "
+                    "inconsistent acquisition image data size"
+                )
+            warn(
                 f"MCD file '{self.path.name}' corrupted: "
                 "inconsistent acquisition image data size"
             )
-        img = np.zeros((height, width, num_channels), dtype=np.float32)
-        img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:]
-        return np.moveaxis(img, -1, 0)
+        img = np.zeros((num_channels, height, width), dtype=np.float32)
+        img[:, ys, xs] = np.transpose(data[:, 3:])
+        return img
 
     def read_slide(self, slide: Slide) -> Optional[np.ndarray]:
         """Reads and decodes a slide image as numpy array using the ``imageio``

diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py
@@ -1,3 +1,4 @@
+import itertools
 import re
 from typing import Dict, List, Optional, Tuple
 from warnings import warn
@@ -121,6 +122,16 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide:
                     slide.acquisitions.append(acquisition)
                     if panorama is not None:
                         panorama.acquisitions.append(acquisition)
+        for a, b in itertools.combinations(slide.acquisitions, 2):
+            a_start = a.metadata["DataStartOffset"]
+            a_end = a.metadata["DataEndOffset"]
+            b_start = b.metadata["DataStartOffset"]
+            b_end = b.metadata["DataEndOffset"]
+            if b_start <= a_start < b_end or b_start < a_end <= b_end:
+                warn(
+                    f"Slide {slide.id} corrupted: "
+                    f"overlapping memory blocks for acquisitions {a.id} and {b.id}"
+                )
         slide.panoramas.sort(key=lambda panorama: panorama.id)
         slide.acquisitions.sort(key=lambda acquisition: acquisition.id)
         return slide

diff --git a/readimc/txt_file.py b/readimc/txt_file.py
@@ -1,6 +1,7 @@
 import re
 from os import PathLike
 from typing import List, Optional, Sequence, TextIO, Tuple, Union
+from warnings import warn
 
 import numpy as np
 import pandas as pd
@@ -93,11 +94,14 @@ def close(self) -> None:
             self._fh.close()
             self._fh = None
 
-    def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray:
+    def read_acquisition(
+        self, acquisition: Optional[Acquisition] = None, strict: bool = True
+    ) -> np.ndarray:
         """Reads IMC acquisition data as numpy array.
 
         :param acquisition: the acquisition to read (for compatibility with ``IMCFile``
             and ``MCDFile``; unused)
+        :param strict: set this parameter to False to try to recover corrupted data
         :return: the acquisition data as 32-bit floating point array,
             shape: (c, y, x)
         """
@@ -121,7 +125,12 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar
             )
         width, height = df[["X", "Y"]].add(1).max(axis=0).astype(int)
         if width * height != len(df.index):
-            raise IOError(
+            if strict:
+                raise IOError(
+                    f"TXT file '{self.path.name}' corrupted: "
+                    "inconsistent acquisition image data size"
+                )
+            warn(
                 f"TXT file '{self.path.name}' corrupted: "
                 "inconsistent acquisition image data size"
             )

diff --git a/requirements_devel.txt b/requirements_devel.txt
@@ -1,5 +1,4 @@
 black
-flake8
-isort
 mypy
 pre-commit
+ruff
diff --git a/setup.cfg b/setup.cfg
@@ -28,3 +28,7 @@ install_requires =
     pandas
 python_requires = >=3.8
 packages = find:
+
+[flake8]
+max-line-length = 88
+extend-ignore = E203