Adaptive resizing for masked images (#177)

Especially in the case of 1080p or higher, if the non-transparent area of the mask image used for the split is small, resizing the entire image to 320x240 will lose the information inside the mask. One idea to prevent this is to adaptively determine the target size according to the number of nonzero elements in the alpha channel of the split image. At least for the L2 norm and histogram, such a change would not affect performance since they only use information about the mask's interior. Masks are not recommended with pHash anyway. Co-authored-by: zalgo3 <[email protected]>
Toufool · Oct 29, 2022 · 5873117 · 5873117
1 parent af21cfd
commit 5873117
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 20 deletions.
diff --git a/src/AutoSplit.py b/src/AutoSplit.py
@@ -19,7 +19,7 @@
 import error_messages
 import user_profile
 from AutoControlledWorker import AutoControlledWorker
-from AutoSplitImage import COMPARISON_RESIZE, START_KEYWORD, AutoSplitImage, ImageType
+from AutoSplitImage import START_KEYWORD, AutoSplitImage, ImageType
 from capture_method import CaptureMethodBase, CaptureMethodEnum
 from gen import about, design, settings, update_checker
 from hotkeys import HOTKEYS, after_setting_hotkey, send_command
@@ -772,12 +772,7 @@ def __get_capture_for_comparison(self):
                 if recovered:
                     capture, _ = self.capture_method.get_frame(self)
 
-        return (
-            None
-            if not is_valid_image(capture)
-            else cv2.resize(capture, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST),
-            is_old_image,
-        )
+        return capture, is_old_image
 
     def __reset_if_should(self, capture: cv2.Mat | None):
         """

diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py
@@ -19,8 +19,9 @@
 COMPARISON_RESIZE_WIDTH = 320
 COMPARISON_RESIZE_HEIGHT = 240
 COMPARISON_RESIZE = (COMPARISON_RESIZE_WIDTH, COMPARISON_RESIZE_HEIGHT)
-LOWER_BOUND = np.array([0, 0, 0, 1], dtype="uint8")
-UPPER_BOUND = np.array([MAXBYTE, MAXBYTE, MAXBYTE, MAXBYTE], dtype="uint8")
+COMPARISON_RESIZE_AREA = COMPARISON_RESIZE_WIDTH * COMPARISON_RESIZE_HEIGHT
+MASK_LOWER_BOUND = np.array([1], dtype="uint8")
+MASK_UPPER_BOUND = np.array([MAXBYTE], dtype="uint8")
 START_KEYWORD = "start_auto_splitter"
 RESET_KEYWORD = "reset"
 
@@ -108,15 +109,31 @@ def __read_image_bytes(self, path: str):
             error_messages.image_type(path)
             return
 
-        image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
         self._has_transparency = check_if_image_has_transparency(image)
         # If image has transparency, create a mask
         if self._has_transparency:
-            # Create mask based on resized, nearest neighbor interpolated split image
-            self.mask = cv2.inRange(image, LOWER_BOUND, UPPER_BOUND)
-        # Add Alpha channel if missing
-        elif image.shape[2] == 3:
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
+            # Adaptively determine the target size according to
+            # the number of nonzero elements in the alpha channel of the split image.
+            # This may result in images bigger than COMPARISON_RESIZE if there's plenty of transparency.
+            # Which wouldn't incur any performance loss in methods where masked regions are ignored.
+            alpha_channel = image[:, :, 3]
+            scale = min(1, (COMPARISON_RESIZE_AREA / cv2.countNonZero(alpha_channel)) ** 0.5)
+
+            image = cv2.resize(
+                image,
+                dsize=None,
+                fx=scale,
+                fy=scale,
+                interpolation=cv2.INTER_NEAREST,
+            )
+
+            # Mask based on adaptively resized, nearest neighbor interpolated split image
+            self.mask = cv2.inRange(alpha_channel, MASK_LOWER_BOUND, MASK_UPPER_BOUND)
+        else:
+            image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
+            # Add Alpha channel if missing
+            if image.shape[2] == 3:
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
 
         self.byte_array = image
 
@@ -134,6 +151,7 @@ def compare_with_capture(
 
         if not is_valid_image(self.byte_array) or not is_valid_image(capture):
             return 0.0
+        capture = cv2.resize(capture, self.byte_array.shape[1::-1])
         comparison_method = self.__get_comparison_method(default)
         if comparison_method == 0:
             return compare_l2_norm(self.byte_array, capture, self.mask)

diff --git a/src/compare.py b/src/compare.py
@@ -2,7 +2,6 @@
 
 import cv2
 import imagehash
-import numpy as np
 from PIL import Image
 from win32con import MAXBYTE
 
@@ -49,7 +48,7 @@ def compare_l2_norm(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = No
     # The L2 Error is summed across all pixels, so this normalizes
     max_error: float = (source.size ** 0.5) * MAXBYTE \
         if not is_valid_image(mask)\
-        else (3 * np.count_nonzero(mask) * MAXBYTE * MAXBYTE) ** 0.5
+        else (3 * cv2.countNonZero(mask) * MAXBYTE * MAXBYTE) ** 0.5
 
     if not max_error:
         return 0.0
@@ -75,7 +74,7 @@ def compare_template(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = N
     # that the value can be. Used for normalizing from 0 to 1.
     max_error = source.size * MAXBYTE * MAXBYTE \
         if not is_valid_image(mask) \
-        else np.count_nonzero(mask)
+        else cv2.countNonZero(mask)
 
     return 1 - (min_val / max_error)
 

diff --git a/typings/cv2/cv2.pyi b/typings/cv2/cv2.pyi
@@ -4391,7 +4391,7 @@ def cornerHarris(src: Mat, blockSize, ksize, k, dst: Mat = ..., borderType=...)
 def cornerMinEigenVal(src: Mat, blockSize, dst: Mat = ..., ksize=..., borderType=...) -> _dst: ...
 def cornerSubPix(image: Mat, corners, winSize, zeroZone, criteria) -> _corners: ...
 def correctMatches(F, points1, points2, newPoints1=..., newPoints2=...) -> tuple[_newPoints1, _newPoints2]: ...
-def countNonZero(src): ...
+def countNonZero(src: Mat | _NumericScalar) -> int: ...
 def createAlignMTB(max_bits=..., exclude_range=..., cut=...): ...
 def createBackgroundSubtractorKNN(history=..., dist2Threshold=..., detectShadows=...): ...
 def createBackgroundSubtractorMOG2(history=..., varThreshold=..., detectShadows=...): ...
@@ -4948,7 +4948,7 @@ def reprojectImageTo3D(disparity, Q, _3dImage=..., handleMissingValues=..., ddep
 
 
 def resize(
-    src: Mat, dsize: _Size, dst: Mat = ..., fx: float = ...,
+    src: Mat | int | bool, dsize: _Size | None, dst: Mat | _NumericScalar = ..., fx: float = ...,
     fy: float = ..., interpolation: int = ...,
 ) -> Mat: ...