Skip to content

Commit

Permalink
Adaptive resizing for masked images (#177)
Browse files Browse the repository at this point in the history
Especially in the case of 1080p or higher, if the non-transparent area of the mask image used for the split is small, resizing the entire image to 320x240 will lose the information inside the mask. One idea to prevent this is to adaptively determine the target size according to the number of nonzero elements in the alpha channel of the split image. At least for the L2 norm and histogram, such a change would not affect performance since they only use information about the mask's interior. Masks are not recommended with pHash anyway.

Co-authored-by: zalgo3 <[email protected]>
  • Loading branch information
Avasam and zalgo3 authored Oct 29, 2022
1 parent af21cfd commit 5873117
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 20 deletions.
9 changes: 2 additions & 7 deletions src/AutoSplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import error_messages
import user_profile
from AutoControlledWorker import AutoControlledWorker
from AutoSplitImage import COMPARISON_RESIZE, START_KEYWORD, AutoSplitImage, ImageType
from AutoSplitImage import START_KEYWORD, AutoSplitImage, ImageType
from capture_method import CaptureMethodBase, CaptureMethodEnum
from gen import about, design, settings, update_checker
from hotkeys import HOTKEYS, after_setting_hotkey, send_command
Expand Down Expand Up @@ -772,12 +772,7 @@ def __get_capture_for_comparison(self):
if recovered:
capture, _ = self.capture_method.get_frame(self)

return (
None
if not is_valid_image(capture)
else cv2.resize(capture, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST),
is_old_image,
)
return capture, is_old_image

def __reset_if_should(self, capture: cv2.Mat | None):
"""
Expand Down
34 changes: 26 additions & 8 deletions src/AutoSplitImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
COMPARISON_RESIZE_WIDTH = 320
COMPARISON_RESIZE_HEIGHT = 240
COMPARISON_RESIZE = (COMPARISON_RESIZE_WIDTH, COMPARISON_RESIZE_HEIGHT)
LOWER_BOUND = np.array([0, 0, 0, 1], dtype="uint8")
UPPER_BOUND = np.array([MAXBYTE, MAXBYTE, MAXBYTE, MAXBYTE], dtype="uint8")
COMPARISON_RESIZE_AREA = COMPARISON_RESIZE_WIDTH * COMPARISON_RESIZE_HEIGHT
MASK_LOWER_BOUND = np.array([1], dtype="uint8")
MASK_UPPER_BOUND = np.array([MAXBYTE], dtype="uint8")
START_KEYWORD = "start_auto_splitter"
RESET_KEYWORD = "reset"

Expand Down Expand Up @@ -108,15 +109,31 @@ def __read_image_bytes(self, path: str):
error_messages.image_type(path)
return

image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
self._has_transparency = check_if_image_has_transparency(image)
# If image has transparency, create a mask
if self._has_transparency:
# Create mask based on resized, nearest neighbor interpolated split image
self.mask = cv2.inRange(image, LOWER_BOUND, UPPER_BOUND)
# Add Alpha channel if missing
elif image.shape[2] == 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
# Adaptively determine the target size according to
# the number of nonzero elements in the alpha channel of the split image.
# This may result in images bigger than COMPARISON_RESIZE if there's plenty of transparency.
# Which wouldn't incur any performance loss in methods where masked regions are ignored.
alpha_channel = image[:, :, 3]
scale = min(1, (COMPARISON_RESIZE_AREA / cv2.countNonZero(alpha_channel)) ** 0.5)

image = cv2.resize(
image,
dsize=None,
fx=scale,
fy=scale,
interpolation=cv2.INTER_NEAREST,
)

# Mask based on adaptively resized, nearest neighbor interpolated split image
self.mask = cv2.inRange(alpha_channel, MASK_LOWER_BOUND, MASK_UPPER_BOUND)
else:
image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
# Add Alpha channel if missing
if image.shape[2] == 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)

self.byte_array = image

Expand All @@ -134,6 +151,7 @@ def compare_with_capture(

if not is_valid_image(self.byte_array) or not is_valid_image(capture):
return 0.0
capture = cv2.resize(capture, self.byte_array.shape[1::-1])
comparison_method = self.__get_comparison_method(default)
if comparison_method == 0:
return compare_l2_norm(self.byte_array, capture, self.mask)
Expand Down
5 changes: 2 additions & 3 deletions src/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import cv2
import imagehash
import numpy as np
from PIL import Image
from win32con import MAXBYTE

Expand Down Expand Up @@ -49,7 +48,7 @@ def compare_l2_norm(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = No
# The L2 Error is summed across all pixels, so this normalizes
max_error: float = (source.size ** 0.5) * MAXBYTE \
if not is_valid_image(mask)\
else (3 * np.count_nonzero(mask) * MAXBYTE * MAXBYTE) ** 0.5
else (3 * cv2.countNonZero(mask) * MAXBYTE * MAXBYTE) ** 0.5

if not max_error:
return 0.0
Expand All @@ -75,7 +74,7 @@ def compare_template(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = N
# that the value can be. Used for normalizing from 0 to 1.
max_error = source.size * MAXBYTE * MAXBYTE \
if not is_valid_image(mask) \
else np.count_nonzero(mask)
else cv2.countNonZero(mask)

return 1 - (min_val / max_error)

Expand Down
4 changes: 2 additions & 2 deletions typings/cv2/cv2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4391,7 +4391,7 @@ def cornerHarris(src: Mat, blockSize, ksize, k, dst: Mat = ..., borderType=...)
def cornerMinEigenVal(src: Mat, blockSize, dst: Mat = ..., ksize=..., borderType=...) -> _dst: ...
def cornerSubPix(image: Mat, corners, winSize, zeroZone, criteria) -> _corners: ...
def correctMatches(F, points1, points2, newPoints1=..., newPoints2=...) -> tuple[_newPoints1, _newPoints2]: ...
def countNonZero(src): ...
def countNonZero(src: Mat | _NumericScalar) -> int: ...
def createAlignMTB(max_bits=..., exclude_range=..., cut=...): ...
def createBackgroundSubtractorKNN(history=..., dist2Threshold=..., detectShadows=...): ...
def createBackgroundSubtractorMOG2(history=..., varThreshold=..., detectShadows=...): ...
Expand Down Expand Up @@ -4948,7 +4948,7 @@ def reprojectImageTo3D(disparity, Q, _3dImage=..., handleMissingValues=..., ddep


def resize(
src: Mat, dsize: _Size, dst: Mat = ..., fx: float = ...,
src: Mat | int | bool, dsize: _Size | None, dst: Mat | _NumericScalar = ..., fx: float = ...,
fy: float = ..., interpolation: int = ...,
) -> Mat: ...

Expand Down

0 comments on commit 5873117

Please sign in to comment.