From fa947a17eefc50b7de5c72cb44e25c848ce3b48b Mon Sep 17 00:00:00 2001 From: Milad Adibi Date: Mon, 20 Feb 2023 11:11:52 +0100 Subject: [PATCH 01/30] Closes issue #6, overlapping data blocks in mcd file. --- readimc/mcd_parser.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index da4ed41..0591049 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -2,7 +2,7 @@ from typing import Dict, List, Optional, Tuple from warnings import warn from xml.etree import ElementTree as ET - +import itertools from .data import Acquisition, Panorama, Slide @@ -121,6 +121,20 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: slide.acquisitions.append(acquisition) if panorama is not None: panorama.acquisitions.append(acquisition) + + #Check for possible overlap of memory blocks between qcquisitions: + acqDict={} + for acq in slide.acquisitions: + acqDict[slide.acquisitions.index(acq)]= [acq.metadata['DataStartOffset'], acq.metadata['DataEndOffset']] + # Only do minimum number of comparisons between the elemnts. + for a, b in itertools.combinations(acqDict.items(), 2): + if (b[1][0] <= a[1][0] and b[1][1] > a[1][0]) or (b[1][0] < a[1][1] and b[1][1] >= a[1][1]): + warn( + "The mcd file appears to be curropted. There are memory blocks that map to both acquisitions " +str(a[0]) + + " and "+ str(b[0]) + ". In an uncurropted file a given memory block should map to only one acquisition" + ) + + slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) return slide From ec855fb9ce6e71b94647235b9c42de2d96d31e6d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Feb 2023 11:28:11 +0000 Subject: [PATCH 02/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- readimc/mcd_parser.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index 0591049..2eff92e 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -1,8 +1,9 @@ +import itertools import re from typing import Dict, List, Optional, Tuple from warnings import warn from xml.etree import ElementTree as ET -import itertools + from .data import Acquisition, Panorama, Slide @@ -121,20 +122,27 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: slide.acquisitions.append(acquisition) if panorama is not None: panorama.acquisitions.append(acquisition) - - #Check for possible overlap of memory blocks between qcquisitions: - acqDict={} + + # Check for possible overlap of memory blocks between qcquisitions: + acqDict = {} for acq in slide.acquisitions: - acqDict[slide.acquisitions.index(acq)]= [acq.metadata['DataStartOffset'], acq.metadata['DataEndOffset']] + acqDict[slide.acquisitions.index(acq)] = [ + acq.metadata["DataStartOffset"], + acq.metadata["DataEndOffset"], + ] # Only do minimum number of comparisons between the elemnts. for a, b in itertools.combinations(acqDict.items(), 2): - if (b[1][0] <= a[1][0] and b[1][1] > a[1][0]) or (b[1][0] < a[1][1] and b[1][1] >= a[1][1]): + if (b[1][0] <= a[1][0] and b[1][1] > a[1][0]) or ( + b[1][0] < a[1][1] and b[1][1] >= a[1][1] + ): warn( - "The mcd file appears to be curropted. There are memory blocks that map to both acquisitions " +str(a[0]) + - " and "+ str(b[0]) + ". In an uncurropted file a given memory block should map to only one acquisition" - ) - - + "The mcd file appears to be curropted. There are memory blocks that map to both acquisitions " + + str(a[0]) + + " and " + + str(b[0]) + + ". In an uncurropted file a given memory block should map to only one acquisition" + ) + slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) return slide From 666ababe61443a2cfae22c74f2d878156a14c282 Mon Sep 17 00:00:00 2001 From: Milad Adibi Date: Tue, 21 Feb 2023 10:46:25 +0100 Subject: [PATCH 03/30] fixing merge conflict --- readimc/mcd_parser.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index 2eff92e..a7d74d9 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -123,26 +123,17 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: if panorama is not None: panorama.acquisitions.append(acquisition) - # Check for possible overlap of memory blocks between qcquisitions: - acqDict = {} - for acq in slide.acquisitions: - acqDict[slide.acquisitions.index(acq)] = [ - acq.metadata["DataStartOffset"], - acq.metadata["DataEndOffset"], - ] - # Only do minimum number of comparisons between the elemnts. - for a, b in itertools.combinations(acqDict.items(), 2): - if (b[1][0] <= a[1][0] and b[1][1] > a[1][0]) or ( - b[1][0] < a[1][1] and b[1][1] >= a[1][1] - ): + # Check for possible overlap of memory blocks between qcquisitions: + for a, b in itertools.combinations(slide.acquisitions, 2): + a_start = a.metadata["DataStartOffset"] + a_end = a.metadata["DataEndOffset"] + b_start = b.metadata["DataStartOffset"] + b_end = b.metadata["DataEndOffset"] + #TODO change 'slide_elem[2].text' to select by element names for modularity + if (b_start <= a_start and b_end > a_start) or (b_start < a_end and b_end >= a_end): warn( - "The mcd file appears to be curropted. There are memory blocks that map to both acquisitions " - + str(a[0]) - + " and " - + str(b[0]) - + ". In an uncurropted file a given memory block should map to only one acquisition" - ) - + f" {slide_elem[2].text} appears to be curropted. There are memory blocks that map to both acquisitions {a.id} and {b.id}. In an uncurropted file a given memory block should map to only one acquisition" + ) slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) return slide From 404a58cf89e1d722138817062e190d94dd09c417 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Feb 2023 09:47:12 +0000 Subject: [PATCH 04/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- readimc/mcd_parser.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index a7d74d9..06fc8f2 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -123,17 +123,19 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: if panorama is not None: panorama.acquisitions.append(acquisition) - # Check for possible overlap of memory blocks between qcquisitions: + # Check for possible overlap of memory blocks between qcquisitions: for a, b in itertools.combinations(slide.acquisitions, 2): a_start = a.metadata["DataStartOffset"] a_end = a.metadata["DataEndOffset"] b_start = b.metadata["DataStartOffset"] b_end = b.metadata["DataEndOffset"] - #TODO change 'slide_elem[2].text' to select by element names for modularity - if (b_start <= a_start and b_end > a_start) or (b_start < a_end and b_end >= a_end): + # TODO change 'slide_elem[2].text' to select by element names for modularity + if (b_start <= a_start and b_end > a_start) or ( + b_start < a_end and b_end >= a_end + ): warn( f" {slide_elem[2].text} appears to be curropted. There are memory blocks that map to both acquisitions {a.id} and {b.id}. In an uncurropted file a given memory block should map to only one acquisition" - ) + ) slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) return slide From a0ab6e2048494a1d5122d7a76c3ebc07e019a2f9 Mon Sep 17 00:00:00 2001 From: Milad4849 <70942846+Milad4849@users.noreply.github.com> Date: Tue, 21 Feb 2023 16:42:41 +0100 Subject: [PATCH 05/30] Update readimc/mcd_parser.py Change request response; more concise syntax of the if statement. Co-authored-by: Jonas Windhager --- readimc/mcd_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index 06fc8f2..d4d501e 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -130,9 +130,7 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: b_start = b.metadata["DataStartOffset"] b_end = b.metadata["DataEndOffset"] # TODO change 'slide_elem[2].text' to select by element names for modularity - if (b_start <= a_start and b_end > a_start) or ( - b_start < a_end and b_end >= a_end - ): + if b_start <= a_start < b_end or b_start < a_end <= b_end: warn( f" {slide_elem[2].text} appears to be curropted. There are memory blocks that map to both acquisitions {a.id} and {b.id}. In an uncurropted file a given memory block should map to only one acquisition" ) From 24fe5bf9497f9ff02046d339827167e33ed60cba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Apr 2023 17:05:15 +0000 Subject: [PATCH 06/30] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/autoflake: v2.0.1 → v2.0.2](https://github.com/PyCQA/autoflake/compare/v2.0.1...v2.0.2) - [github.com/psf/black: 23.1.0 → 23.3.0](https://github.com/psf/black/compare/23.1.0...23.3.0) - [github.com/pre-commit/mirrors-mypy: v1.0.0 → v1.2.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.0.0...v1.2.0) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca4f9bd..f18d7c4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,12 +20,12 @@ repos: hooks: - id: isort - repo: https://github.com/PyCQA/autoflake - rev: v2.0.1 + rev: v2.0.2 hooks: - id: autoflake args: [--in-place, --remove-all-unused-imports] - repo: https://github.com/psf/black - rev: '23.1.0' + rev: '23.3.0' hooks: - id: black - repo: https://github.com/PyCQA/flake8 @@ -34,7 +34,7 @@ repos: - id: flake8 additional_dependencies: [flake8-typing-imports] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.0 + rev: v1.2.0 hooks: - id: mypy additional_dependencies: [types-requests, types-PyYAML] From e6cc3b85cfd44832a51178f9482dfb6aedd83681 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 17:07:54 +0000 Subject: [PATCH 07/30] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/autoflake: v2.0.2 → v2.2.0](https://github.com/PyCQA/autoflake/compare/v2.0.2...v2.2.0) - [github.com/pre-commit/mirrors-mypy: v1.2.0 → v1.4.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.2.0...v1.4.1) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f18d7c4..c9fd9c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: hooks: - id: isort - repo: https://github.com/PyCQA/autoflake - rev: v2.0.2 + rev: v2.2.0 hooks: - id: autoflake args: [--in-place, --remove-all-unused-imports] @@ -34,7 +34,7 @@ repos: - id: flake8 additional_dependencies: [flake8-typing-imports] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.4.1 hooks: - id: mypy additional_dependencies: [types-requests, types-PyYAML] From 4896d5fd8435dc2848032c5b6b5217b7fa8e3862 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Thu, 29 Jun 2023 22:49:53 -0700 Subject: [PATCH 08/30] Update mcd_file.py I work for StandardBio, and it is a known thing that the last scan sometimes is not complete. This generates an incomplete numpy array for that acquisition. The default "read_acquisition" is called with "strict=True", but when called with "strict=False", it will use populate the missing data in the last scan with "zeros" and allow the retrieval of the acquisition. --- readimc/mcd_file.py | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 0e13e87..c1dbd19 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -90,7 +90,7 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray: + def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict = True) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read @@ -120,13 +120,21 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar num_channels = acquisition.num_channels data_size = data_end_offset - data_start_offset bytes_per_pixel = (num_channels + 3) * value_bytes - if data_size % bytes_per_pixel != 0: + + if data_size % bytes_per_pixel != 0: data_size += 1 + if data_size % bytes_per_pixel != 0: - raise IOError( + if strict == True: + raise IOError( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" - ) + ) + else: # strict == False, print error and continue + print( + f"MCD file '{self.path.name}' corrupted: " + "invalid acquisition image data size" + ) num_pixels = data_size // bytes_per_pixel self._fh.seek(0) data = np.memmap( @@ -136,12 +144,27 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar offset=data_start_offset, shape=(num_pixels, num_channels + 3), ) - width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 - if width * height != data.shape[0]: - raise IOError( - f"MCD file '{self.path.name}' corrupted: " - "inconsistent acquisition image data size" - ) + + if strict == True: # default behavior + width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 + if width * height != data.shape[0]: + raise IOError( + f"MCD file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + + else: #strict == False + width = int(acquisition.metadata["MaxX"]) + height = int(acquisition.metadata["MaxY"]) + if width * height != num_pixels: + print( + f"MCD file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + append_pixel = (width * height)-num_pixels + append_data = np.zeros((append_pixel, num_channels), dtype=np.float32) + np.append(data, append_data) + img = np.zeros((height, width, num_channels), dtype=np.float32) img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] return np.moveaxis(img, -1, 0) From c3ce23eed05155cc3e3f99fbf06ae39d1985c03f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 Jun 2023 05:52:15 +0000 Subject: [PATCH 09/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- readimc/mcd_file.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index c1dbd19..afc9c6f 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -90,7 +90,9 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict = True) -> np.ndarray: + def read_acquisition( + self, acquisition: Optional[Acquisition] = None, strict=True + ) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read @@ -120,20 +122,20 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict = T num_channels = acquisition.num_channels data_size = data_end_offset - data_start_offset bytes_per_pixel = (num_channels + 3) * value_bytes - - if data_size % bytes_per_pixel != 0: + + if data_size % bytes_per_pixel != 0: data_size += 1 - + if data_size % bytes_per_pixel != 0: if strict == True: raise IOError( - f"MCD file '{self.path.name}' corrupted: " - "invalid acquisition image data size" + f"MCD file '{self.path.name}' corrupted: " + "invalid acquisition image data size" ) - else: # strict == False, print error and continue + else: # strict == False, print error and continue print( - f"MCD file '{self.path.name}' corrupted: " - "invalid acquisition image data size" + f"MCD file '{self.path.name}' corrupted: " + "invalid acquisition image data size" ) num_pixels = data_size // bytes_per_pixel self._fh.seek(0) @@ -144,27 +146,27 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict = T offset=data_start_offset, shape=(num_pixels, num_channels + 3), ) - - if strict == True: # default behavior + + if strict == True: # default behavior width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 if width * height != data.shape[0]: raise IOError( f"MCD file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" - ) - - else: #strict == False + ) + + else: # strict == False width = int(acquisition.metadata["MaxX"]) height = int(acquisition.metadata["MaxY"]) if width * height != num_pixels: print( f"MCD file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" - ) - append_pixel = (width * height)-num_pixels + ) + append_pixel = (width * height) - num_pixels append_data = np.zeros((append_pixel, num_channels), dtype=np.float32) np.append(data, append_data) - + img = np.zeros((height, width, num_channels), dtype=np.float32) img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] return np.moveaxis(img, -1, 0) From 22edb8c459dccd338c953d17a28ad012007dd30b Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:04:09 -0700 Subject: [PATCH 10/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index afc9c6f..01dde17 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -91,7 +91,7 @@ def close(self) -> None: self._fh = None def read_acquisition( - self, acquisition: Optional[Acquisition] = None, strict=True + self, acquisition: Optional[Acquisition] = None, strict: bool = True ) -> np.ndarray: """Reads IMC acquisition data as numpy array. From 34a4d1e1ab1f19c085bab618b658b042f5eb0fe3 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:05:17 -0700 Subject: [PATCH 11/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 01dde17..59baa5f 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -122,7 +122,6 @@ def read_acquisition( num_channels = acquisition.num_channels data_size = data_end_offset - data_start_offset bytes_per_pixel = (num_channels + 3) * value_bytes - if data_size % bytes_per_pixel != 0: data_size += 1 From 06fb434b6e43e67d549581381a0eafe212ea3a1b Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:05:38 -0700 Subject: [PATCH 12/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 59baa5f..664694a 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -124,7 +124,6 @@ def read_acquisition( bytes_per_pixel = (num_channels + 3) * value_bytes if data_size % bytes_per_pixel != 0: data_size += 1 - if data_size % bytes_per_pixel != 0: if strict == True: raise IOError( From b685d976eeb90eea8f1132b4ef8499c195ab2ea5 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:06:27 -0700 Subject: [PATCH 13/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 664694a..e58dfd3 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -125,7 +125,7 @@ def read_acquisition( if data_size % bytes_per_pixel != 0: data_size += 1 if data_size % bytes_per_pixel != 0: - if strict == True: + if strict: raise IOError( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" From f78ddec85b43961d956f4dec10540fd9fec893b8 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:07:32 -0700 Subject: [PATCH 14/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index e58dfd3..ec78e9f 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -130,7 +130,7 @@ def read_acquisition( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" ) - else: # strict == False, print error and continue + else: print( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" From 3a0000607d5303cc4a5788342258a9844f6fa97a Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:07:46 -0700 Subject: [PATCH 15/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index ec78e9f..06e7f84 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -131,7 +131,7 @@ def read_acquisition( "invalid acquisition image data size" ) else: - print( + warn( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" ) From 3e764837027673ccbd00c46c9a65cefc5b586132 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:08:14 -0700 Subject: [PATCH 16/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 06e7f84..9e38b84 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -153,7 +153,7 @@ def read_acquisition( "inconsistent acquisition image data size" ) - else: # strict == False + else: width = int(acquisition.metadata["MaxX"]) height = int(acquisition.metadata["MaxY"]) if width * height != num_pixels: From 39882094b8065803615121d0263781e2c33276e1 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:11:28 -0700 Subject: [PATCH 17/30] Update readimc/mcd_file.py Co-authored-by: Jonas Windhager --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 9e38b84..a191e3a 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -145,7 +145,7 @@ def read_acquisition( shape=(num_pixels, num_channels + 3), ) - if strict == True: # default behavior + if strict: width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 if width * height != data.shape[0]: raise IOError( From 461a9d9171147972ad839312da617864b9552790 Mon Sep 17 00:00:00 2001 From: "Shah, Sandip P" <68666105+sandip-shah@users.noreply.github.com> Date: Fri, 30 Jun 2023 09:46:06 -0700 Subject: [PATCH 18/30] Update mcd_file.py Modified np.append(data, append_data) to data = np.append(data, append_data) --- readimc/mcd_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index a191e3a..f1469d4 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -163,7 +163,7 @@ def read_acquisition( ) append_pixel = (width * height) - num_pixels append_data = np.zeros((append_pixel, num_channels), dtype=np.float32) - np.append(data, append_data) + data = np.append(data, append_data) img = np.zeros((height, width, num_channels), dtype=np.float32) img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] From a8cb18ec6569a2ed37926631acbd820217408106 Mon Sep 17 00:00:00 2001 From: Jonas Windhager Date: Thu, 6 Jul 2023 13:32:30 +0200 Subject: [PATCH 19/30] check for overlapping memory blocks --- readimc/mcd_parser.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/readimc/mcd_parser.py b/readimc/mcd_parser.py index d4d501e..248b273 100644 --- a/readimc/mcd_parser.py +++ b/readimc/mcd_parser.py @@ -122,17 +122,15 @@ def _parse_slide(self, slide_elem: ET.Element) -> Slide: slide.acquisitions.append(acquisition) if panorama is not None: panorama.acquisitions.append(acquisition) - - # Check for possible overlap of memory blocks between qcquisitions: for a, b in itertools.combinations(slide.acquisitions, 2): a_start = a.metadata["DataStartOffset"] a_end = a.metadata["DataEndOffset"] b_start = b.metadata["DataStartOffset"] b_end = b.metadata["DataEndOffset"] - # TODO change 'slide_elem[2].text' to select by element names for modularity if b_start <= a_start < b_end or b_start < a_end <= b_end: warn( - f" {slide_elem[2].text} appears to be curropted. There are memory blocks that map to both acquisitions {a.id} and {b.id}. In an uncurropted file a given memory block should map to only one acquisition" + f"Slide {slide.id} corrupted: " + f"overlapping memory blocks for acquisitions {a.id} and {b.id}" ) slide.panoramas.sort(key=lambda panorama: panorama.id) slide.acquisitions.sort(key=lambda acquisition: acquisition.id) From bcd46e426f746f560ad7e0b87dc8edb311531f22 Mon Sep 17 00:00:00 2001 From: Jonas Windhager Date: Thu, 6 Jul 2023 13:44:43 +0200 Subject: [PATCH 20/30] add docstring --- readimc/mcd_file.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index f1469d4..98fd3cc 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -96,6 +96,7 @@ def read_acquisition( """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read + :param strict: set this parameter to False to try to recover corrupted data :return: the acquisition data as 32-bit floating point array, shape: (c, y, x) """ @@ -130,11 +131,10 @@ def read_acquisition( f"MCD file '{self.path.name}' corrupted: " "invalid acquisition image data size" ) - else: - warn( - f"MCD file '{self.path.name}' corrupted: " - "invalid acquisition image data size" - ) + warn( + f"MCD file '{self.path.name}' corrupted: " + "invalid acquisition image data size" + ) num_pixels = data_size // bytes_per_pixel self._fh.seek(0) data = np.memmap( @@ -144,7 +144,6 @@ def read_acquisition( offset=data_start_offset, shape=(num_pixels, num_channels + 3), ) - if strict: width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 if width * height != data.shape[0]: @@ -164,7 +163,6 @@ def read_acquisition( append_pixel = (width * height) - num_pixels append_data = np.zeros((append_pixel, num_channels), dtype=np.float32) data = np.append(data, append_data) - img = np.zeros((height, width, num_channels), dtype=np.float32) img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] return np.moveaxis(img, -1, 0) From c56862053ed9a33521360a760cfb50e0df9bdec5 Mon Sep 17 00:00:00 2001 From: Jonas Windhager Date: Thu, 6 Jul 2023 14:21:53 +0200 Subject: [PATCH 21/30] default to MaxX/MaxY, fix appending --- readimc/mcd_file.py | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index 98fd3cc..e756ea6 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -144,28 +144,35 @@ def read_acquisition( offset=data_start_offset, shape=(num_pixels, num_channels + 3), ) - if strict: - width, height = np.amax(data[:, :2], axis=0).astype(int) + 1 - if width * height != data.shape[0]: - raise IOError( - f"MCD file '{self.path.name}' corrupted: " - "inconsistent acquisition image data size" + xs = data[:, 0].astype(int) + ys = data[:, 1].astype(int) + try: + width = int(acquisition.metadata["MaxX"]) + 1 + height = int(acquisition.metadata["MaxY"]) + 1 + if width <= np.amax(xs) or height <= np.amax(ys): + raise ValueError( + "data shape is incompatible with acquisition image dimensions" ) - - else: - width = int(acquisition.metadata["MaxX"]) - height = int(acquisition.metadata["MaxY"]) - if width * height != num_pixels: - print( + except (KeyError, ValueError): + warn( + f"MCD file '{self.path.name}' corrupted: " + "cannot read acquisition image dimensions; recovering from data shape" + ) + width = np.amax(xs) + 1 + height = np.amax(ys) + 1 + if width * height != data.shape[0]: + if strict: + raise IOError( f"MCD file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" ) - append_pixel = (width * height) - num_pixels - append_data = np.zeros((append_pixel, num_channels), dtype=np.float32) - data = np.append(data, append_data) - img = np.zeros((height, width, num_channels), dtype=np.float32) - img[data[:, 1].astype(int), data[:, 0].astype(int), :] = data[:, 3:] - return np.moveaxis(img, -1, 0) + warn( + f"MCD file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + img = np.zeros((num_channels, height, width), dtype=np.float32) + img[:, ys, xs] = np.transpose(data[:, 3:]) + return img def read_slide(self, slide: Slide) -> Optional[np.ndarray]: """Reads and decodes a slide image as numpy array using the ``imageio`` From 531e97ac027edc5c42f82d9ba0a08c810f544389 Mon Sep 17 00:00:00 2001 From: Jonas Windhager Date: Thu, 6 Jul 2023 14:34:57 +0200 Subject: [PATCH 22/30] switch to ruff --- .flake8 | 3 --- .isort.cfg | 2 -- .pre-commit-config.yaml | 18 ++++-------------- pyproject.toml | 7 +++++++ pytest.ini | 4 ---- requirements_devel.txt | 3 +-- setup.cfg | 4 ++++ 7 files changed, 16 insertions(+), 25 deletions(-) delete mode 100644 .flake8 delete mode 100644 .isort.cfg delete mode 100644 pytest.ini diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 8dd399a..0000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 88 -extend-ignore = E203 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index b9fb3f3..0000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile=black diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c9fd9c7..2d48eeb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,24 +15,15 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace - - repo: https://github.com/PyCQA/isort - rev: "5.12.0" + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.277 hooks: - - id: isort - - repo: https://github.com/PyCQA/autoflake - rev: v2.2.0 - hooks: - - id: autoflake - args: [--in-place, --remove-all-unused-imports] + - id: ruff + args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/psf/black rev: '23.3.0' hooks: - id: black - - repo: https://github.com/PyCQA/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-typing-imports] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.4.1 hooks: @@ -40,4 +31,3 @@ repos: additional_dependencies: [types-requests, types-PyYAML] ci: autoupdate_branch: develop - skip: [flake8, mypy] diff --git a/pyproject.toml b/pyproject.toml index dfee5e0..e493034 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,5 +2,12 @@ requires = ["setuptools>=64", "wheel", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" +[tool.pytest.ini_options] +addopts = ["--cov=readimc", "--cov-report=xml:coverage.xml"] +testpaths = ["tests"] + +[tool.ruff] +select = ["E", "F", "I"] + [tool.setuptools_scm] write_to = "readimc/_version.py" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 1261316..0000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -addopts = --cov=readimc --cov-report xml:coverage.xml -testpaths = - tests diff --git a/requirements_devel.txt b/requirements_devel.txt index fb7a1bb..4abdba6 100644 --- a/requirements_devel.txt +++ b/requirements_devel.txt @@ -1,5 +1,4 @@ black -flake8 -isort mypy pre-commit +ruff diff --git a/setup.cfg b/setup.cfg index 16d411f..2776a70 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,3 +28,7 @@ install_requires = pandas python_requires = >=3.8 packages = find: + +[flake8] +max-line-length = 88 +extend-ignore = E203 From 9326edd1794f3959dc70a7721095f7c448497089 Mon Sep 17 00:00:00 2001 From: Jonas Windhager Date: Thu, 6 Jul 2023 14:43:20 +0200 Subject: [PATCH 23/30] MaxX/MaxY contains size, not index --- readimc/mcd_file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readimc/mcd_file.py b/readimc/mcd_file.py index e756ea6..d49ed0b 100644 --- a/readimc/mcd_file.py +++ b/readimc/mcd_file.py @@ -147,8 +147,8 @@ def read_acquisition( xs = data[:, 0].astype(int) ys = data[:, 1].astype(int) try: - width = int(acquisition.metadata["MaxX"]) + 1 - height = int(acquisition.metadata["MaxY"]) + 1 + width = int(acquisition.metadata["MaxX"]) + height = int(acquisition.metadata["MaxY"]) if width <= np.amax(xs) or height <= np.amax(ys): raise ValueError( "data shape is incompatible with acquisition image dimensions" From a1c4d931ecaaac2a67135a224cf3f89d4fc11d39 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:14:25 +0000 Subject: [PATCH 24/30] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - https://github.com/charliermarsh/ruff-pre-commit → https://github.com/astral-sh/ruff-pre-commit - [github.com/astral-sh/ruff-pre-commit: v0.0.277 → v0.0.282](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.277...v0.0.282) - [github.com/psf/black: 23.3.0 → 23.7.0](https://github.com/psf/black/compare/23.3.0...23.7.0) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d48eeb..af0e053 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,13 +15,13 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.277 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.282 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/psf/black - rev: '23.3.0' + rev: '23.7.0' hooks: - id: black - repo: https://github.com/pre-commit/mirrors-mypy From 28693dc717ad121f5889d43e295926646a2e1dfa Mon Sep 17 00:00:00 2001 From: Milad Adibi Date: Fri, 11 Aug 2023 14:32:22 +0200 Subject: [PATCH 25/30] =?UTF-8?q?=C3=9Cpdated=20CHANGELOG.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 055fe21..c4686dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.3] - 2023-08-11 + +Maintenance release + +Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6) + +Implemented lenient extraction of corrupted imaging data [#19](https://github.com/BodenmillerGroup/readimc/pull/19) + ## [0.6.2] - 2023-01-31 Maintenance release From 2ef13be88df5e4e73437fbadfaa5f4cab44d9d8c Mon Sep 17 00:00:00 2001 From: Milad Adibi Date: Fri, 11 Aug 2023 15:16:09 +0200 Subject: [PATCH 26/30] Bumped the minor version instead of patch version --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4686dc..7ac7330 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,9 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.6.3] - 2023-08-11 - -Maintenance release +## [0.7.0] - 2023-08-11 Implemented checks for overlapping raw data blocks in MCD file metadata [#6](https://github.com/BodenmillerGroup/readimc/issues/6) From 8bbb3cfab1440f2ed41c7f16dd1fdfb1f24e2a28 Mon Sep 17 00:00:00 2001 From: leorro Date: Sun, 3 Sep 2023 20:18:49 +0300 Subject: [PATCH 27/30] update txt_file.py --- readimc/txt_file.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/readimc/txt_file.py b/readimc/txt_file.py index 4f92926..48ea7a7 100644 --- a/readimc/txt_file.py +++ b/readimc/txt_file.py @@ -93,11 +93,12 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndarray: + def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict: bool = True) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read (for compatibility with ``IMCFile`` and ``MCDFile``; unused) + :param strict: set this parameter to False to try to recover corrupted data :return: the acquisition data as 32-bit floating point array, shape: (c, y, x) """ @@ -121,7 +122,12 @@ def read_acquisition(self, acquisition: Optional[Acquisition] = None) -> np.ndar ) width, height = df[["X", "Y"]].add(1).max(axis=0).astype(int) if width * height != len(df.index): - raise IOError( + if strict: + raise IOError( + f"TXT file '{self.path.name}' corrupted: " + "inconsistent acquisition image data size" + ) + warn( f"TXT file '{self.path.name}' corrupted: " "inconsistent acquisition image data size" ) From 670adb8faf419029128e29dbfbfedd8cf33d0694 Mon Sep 17 00:00:00 2001 From: leorro Date: Sun, 3 Sep 2023 20:42:36 +0300 Subject: [PATCH 28/30] update txt_file.py --- readimc/txt_file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/readimc/txt_file.py b/readimc/txt_file.py index 48ea7a7..6209cc1 100644 --- a/readimc/txt_file.py +++ b/readimc/txt_file.py @@ -1,6 +1,7 @@ import re from os import PathLike from typing import List, Optional, Sequence, TextIO, Tuple, Union +from warnings import warn import numpy as np import pandas as pd From 43455439a2343b660d38bbab75fca5bceccb5088 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 3 Sep 2023 17:51:24 +0000 Subject: [PATCH 29/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- readimc/txt_file.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readimc/txt_file.py b/readimc/txt_file.py index 6209cc1..b8e7bd6 100644 --- a/readimc/txt_file.py +++ b/readimc/txt_file.py @@ -94,7 +94,9 @@ def close(self) -> None: self._fh.close() self._fh = None - def read_acquisition(self, acquisition: Optional[Acquisition] = None, strict: bool = True) -> np.ndarray: + def read_acquisition( + self, acquisition: Optional[Acquisition] = None, strict: bool = True + ) -> np.ndarray: """Reads IMC acquisition data as numpy array. :param acquisition: the acquisition to read (for compatibility with ``IMCFile`` From 8eef98d1ad303d2c2397915881456fc481e9f639 Mon Sep 17 00:00:00 2001 From: Milad4849 <70942846+Milad4849@users.noreply.github.com> Date: Thu, 23 Nov 2023 12:08:30 +0100 Subject: [PATCH 30/30] added link for 0.7.0 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ac7330..b279f5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,7 +103,7 @@ Minor documentation changes ## [0.1.0] - 2021-10-09 Initial release - +[0.7.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.2...v0.7.0 [0.6.2]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.1...v0.6.2 [0.6.1]: https://github.com/BodenmillerGroup/readimc/compare/v0.6.0...v0.6.1 [0.6.0]: https://github.com/BodenmillerGroup/readimc/compare/v0.5.0...v0.6.0