Batch fixes (#176)

* pyright update * lint-and-build cover more files * Get IDirect3dDevice from LearningModelDevice Closes #175 Co-authored-by: Algomancer <[email protected]> * Ensure we're using the right camera size And not OpenCV's default 640x480 * Fix rounding error in displayed decimal * Revert "back to windowed" This reverts commit 9553271. * Fix highest similarity for start image * Fix split below treshold when image is not valid * Detect gray frames from OBS-Camera Co-authored-by: Algomancer <[email protected]>
Toufool · Oct 29, 2022 · af21cfd · af21cfd
1 parent 4b06065
commit af21cfd
Show file tree

Hide file tree

Showing 11 changed files with 107 additions and 60 deletions.
diff --git a/.github/workflows/lint-and-build.yml b/.github/workflows/lint-and-build.yml
@@ -8,8 +8,9 @@ on:
       - master
     paths:
       - "**.py"
-      - "**.pyi"
       - "**.ui"
+      - ".github/workflows/lint-and-build.yml"
+      - "**/requirements.txt"
   pull_request:
     branches:
       - main
@@ -20,6 +21,8 @@ on:
       - "**.py"
       - "**.pyi"
       - "**.ui"
+      - ".github/workflows/lint-and-build.yml"
+      - "**/requirements*.txt"
 
 env:
   GITHUB_HEAD_REPOSITORY: ${{ github.event.pull_request.head.repo.full_name }}

diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ This program can be used to automatically start, split, and reset your preferred
     The smaller the selected region, the more efficient it is.  
 - **Windows Graphics Capture** (fast, most compatible, capped at 60fps)  
     Only available in Windows 10.0.17134 and up.  
-    Due to current technical limitations, it requires having at least one audio or video Capture Device connected and enabled. Even if it won't be used.  
+    Due to current technical limitations, Windows versions below 10.0.0.17763 require having at least one audio or video Capture Device connected and enabled.
     Allows recording UWP apps, Hardware Accelerated and Exclusive Fullscreen windows.  
     Adds a yellow border on Windows 10 (not on Windows 11).
     Caps at around 60 FPS.  

diff --git a/scripts/build.ps1 b/scripts/build.ps1
@@ -1,6 +1,7 @@
 & "$PSScriptRoot/compile_resources.ps1"
 pyinstaller `
   --onefile `
+  --windowed `
   --additional-hooks-dir=Pyinstaller/hooks `
   --icon=res/icon.ico `
   --splash=res/splash.png `

diff --git a/scripts/requirements-dev.txt b/scripts/requirements-dev.txt
@@ -21,15 +21,11 @@ flake8-quotes
 flake8-simplify
 pep8-naming
 pylint>=2.14,<3.0.0  # New checks  # 3.0 still in pre-release
-pyright>=1.1.270  # Typeshed update
+pyright>=1.1.276  # Typeshed update
 unify
 #
 # Run `./scripts/designer.ps1` to quickly open the bundled PyQt Designer.
 # Can also be downloaded externally as a non-python package
 qt6-applications
 # Types
-types-d3dshot
-types-keyboard
-types-pyinstaller
-types-pywin32
 typing-extensions
diff --git a/src/AutoSplit.py b/src/AutoSplit.py
@@ -89,7 +89,6 @@ class AutoSplit(QMainWindow, design.Ui_MainWindow):
     reset_highest_similarity = 0.0
 
     # Ensure all other attributes are defined
-    start_image_split_below_threshold = False
     waiting_for_split_delay = False
     split_below_threshold = False
     run_start_time = 0.0
@@ -292,7 +291,7 @@ def __load_start_image(self, started_by_button: bool = False, wait_for_delay: bo
 
         self.highest_similarity = 0.0
         self.reset_highest_similarity = 0.0
-        self.start_image_split_below_threshold = False
+        self.split_below_threshold = False
         self.timer_start_image.start(int(1000 / self.settings_dict["fps_limit"]))
 
         QApplication.processEvents()
@@ -312,26 +311,25 @@ def __start_image_function(self):
         if start_image_similarity > self.highest_similarity:
             self.highest_similarity = start_image_similarity
 
-        self.table_current_image_threshold_label.setText(decimal(start_image_threshold))
         self.table_current_image_live_label.setText(decimal(start_image_similarity))
         self.table_current_image_highest_label.setText(decimal(self.highest_similarity))
+        self.table_current_image_threshold_label.setText(decimal(start_image_threshold))
 
         # If the {b} flag is set, let similarity go above threshold first, then split on similarity below threshold
         # Otherwise just split when similarity goes above threshold
+        # TODO: Abstract with similar check in split image
         below_flag = self.start_image.check_flag(BELOW_FLAG)
 
         # Negative means belove threshold, positive means above
         similarity_diff = start_image_similarity - start_image_threshold
-        if below_flag \
-                and not self.start_image_split_below_threshold \
-                and similarity_diff >= 0:
-            self.start_image_split_below_threshold = True
+        if below_flag and not self.split_below_threshold and similarity_diff >= 0:
+            self.split_below_threshold = True
             return
-        if (below_flag and self.start_image_split_below_threshold and similarity_diff < 0) \
-                or (not below_flag and similarity_diff >= 0):
+        if (below_flag and self.split_below_threshold and similarity_diff < 0 and is_valid_image(capture)) \
+                or (not below_flag and similarity_diff >= 0):  # pylint: disable=too-many-boolean-expressions
 
             self.timer_start_image.stop()
-            self.start_image_split_below_threshold = False
+            self.split_below_threshold = False
 
             # delay start image if needed
             if self.start_image.get_delay_time(self) > 0:
@@ -410,6 +408,7 @@ def __check_fps(self):
             while count < CHECK_FPS_ITERATIONS:
                 capture, is_old_image = self.__get_capture_for_comparison()
                 _ = image.compare_with_capture(self, capture)
+                # TODO: If is_old_image=true is always returned, this becomes an infinite loop
                 if not is_old_image:
                     count += 1
 
@@ -648,20 +647,22 @@ def __similarity_threshold_loop(self, number_of_split_images: int, dummy_splits_
             frame_interval: float = 1 / self.settings_dict["fps_limit"]
             wait_delta = int(frame_interval - (time() - start) % frame_interval)
 
+            below_flag = self.split_image.check_flag(BELOW_FLAG)
             # if the b flag is set, let similarity go above threshold first,
             # then split on similarity below threshold.
             # if no b flag, just split when similarity goes above threshold.
+            # TODO: Abstract with similar check in start image
             if not self.waiting_for_split_delay:
                 if similarity >= self.split_image.get_similarity_threshold(self):
-                    if not self.split_image.check_flag(BELOW_FLAG):
+                    if not below_flag:
                         break
                     if not self.split_below_threshold:
                         self.split_below_threshold = True
                         QTest.qWait(wait_delta)
                         continue
 
                 elif (  # pylint: disable=confusing-consecutive-elif
-                        self.split_image.check_flag(BELOW_FLAG) and self.split_below_threshold
+                        below_flag and self.split_below_threshold and is_valid_image(capture)
                 ):
                     self.split_below_threshold = False
                     break
@@ -813,12 +814,15 @@ def __reset_if_should(self, capture: cv2.Mat | None):
         return self.__check_for_reset_state_update_ui()
 
     def __update_split_image(self, specific_image: AutoSplitImage | None = None):
-        # Splitting/skipping when there are no images left or Undoing past the first image
         # Start image is expected to be out of range (index 0 of 0-length array)
-        if (not specific_image or specific_image.image_type != ImageType.START) \
-                and self.__is_current_split_out_of_range():
-            self.reset()
-            return
+        if not specific_image or specific_image.image_type != ImageType.START:
+            # need to reset highest_similarity and split_below_threshold each time an image updates.
+            self.highest_similarity = 0.0
+            self.split_below_threshold = False
+            # Splitting/skipping when there are no images left or Undoing past the first image
+            if self.__is_current_split_out_of_range():
+                self.reset()
+                return
 
         # Get split image
         self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0]
@@ -835,10 +839,6 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None):
             loop_tuple = self.split_images_and_loop_number[self.split_image_number]
             self.image_loop_value_label.setText(f"{loop_tuple[1]}/{loop_tuple[0].loops}")
 
-        self.highest_similarity = 0.0
-        # need to set split below threshold to false each time an image updates.
-        self.split_below_threshold = False
-
     def closeEvent(self, a0: QtGui.QCloseEvent | None = None):
         """
         Exit safely when closing the window

diff --git a/src/capture_method/VideoCaptureDeviceCaptureMethod.py b/src/capture_method/VideoCaptureDeviceCaptureMethod.py
@@ -4,6 +4,7 @@
 from typing import TYPE_CHECKING
 
 import cv2
+from pygrabber import dshow_graph
 
 from capture_method.CaptureMethodBase import CaptureMethodBase
 from error_messages import CREATE_NEW_ISSUE_MESSAGE, exception_traceback
@@ -12,6 +13,19 @@
 if TYPE_CHECKING:
     from AutoSplit import AutoSplit
 
+OBS_CAMERA_BLANK = [127, 129, 128]
+
+
+def is_blank(image: cv2.Mat):
+    # Running np.all on the entire array is extremely slow.
+    # Because it always converts the entire array to boolean first
+    # So instead we loop manually to stop early.
+    for row in image:
+        for pixel in row:
+            if all(pixel != OBS_CAMERA_BLANK):
+                return False
+    return True
+
 
 class VideoCaptureDeviceCaptureMethod(CaptureMethodBase):
     capture_device: cv2.VideoCapture
@@ -35,7 +49,14 @@ def __read_loop(self, autosplit: AutoSplit):
                     # STS_ERROR most likely means the camera is occupied
                     result = False
                     image = None
-                self.last_captured_frame = image if result else None
+                if not result:
+                    image = None
+
+                # Blank frame. Reuse the previous one.
+                if image is not None and is_blank(image):
+                    continue
+
+                self.last_captured_frame = image
                 self.is_old_image = False
         except Exception as exception:  # pylint: disable=broad-except # We really want to catch everything here
             error = exception
@@ -51,8 +72,20 @@ def __read_loop(self, autosplit: AutoSplit):
 
     def __init__(self, autosplit: AutoSplit):
         super().__init__()
+        filter_graph = dshow_graph.FilterGraph()
+        filter_graph.add_video_input_device(autosplit.settings_dict["capture_device_id"])
+        width, height = filter_graph.get_input_device().get_current_format()
+        filter_graph.remove_filters()
+
         self.capture_device = cv2.VideoCapture(autosplit.settings_dict["capture_device_id"])
         self.capture_device.setExceptionMode(True)
+        # Ensure we're using the right camera size. And not OpenCV's default 640x480
+        try:
+            self.capture_device.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+            self.capture_device.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+        # Some cameras don't allow changing the resolution
+        except cv2.error:
+            pass
         self.stop_thread = Event()
         self.capture_thread = Thread(target=lambda: self.__read_loop(autosplit))
         self.capture_thread.start()

diff --git a/src/capture_method/WindowsGraphicsCaptureMethod.py b/src/capture_method/WindowsGraphicsCaptureMethod.py
@@ -11,10 +11,9 @@
 from winsdk.windows.graphics.capture.interop import create_for_window
 from winsdk.windows.graphics.directx import DirectXPixelFormat
 from winsdk.windows.graphics.imaging import BitmapBufferAccessMode, SoftwareBitmap
-from winsdk.windows.media.capture import MediaCapture
 
 from capture_method.CaptureMethodBase import CaptureMethodBase
-from utils import WINDOWS_BUILD_NUMBER, is_valid_hwnd
+from utils import WINDOWS_BUILD_NUMBER, get_direct3d_device, is_valid_hwnd
 
 if TYPE_CHECKING:
     from AutoSplit import AutoSplit
@@ -33,19 +32,10 @@ def __init__(self, autosplit: AutoSplit):
         super().__init__(autosplit)
         if not is_valid_hwnd(autosplit.hwnd):
             return
-        # Note: Must create in the same thread (can't use a global) otherwise when ran from LiveSplit it will raise:
-        # OSError: The application called an interface that was marshalled for a different thread
-        media_capture = MediaCapture()
-        item = create_for_window(autosplit.hwnd)
-
-        async def coroutine():
-            await (media_capture.initialize_async() or asyncio.sleep(0))
-        asyncio.run(coroutine())
 
-        if not media_capture.media_capture_settings:
-            raise OSError("Unable to initialize a Direct3D Device.")
+        item = create_for_window(autosplit.hwnd)
         frame_pool = Direct3D11CaptureFramePool.create_free_threaded(
-            media_capture.media_capture_settings.direct3_d11_device,
+            get_direct3d_device(),
             DirectXPixelFormat.B8_G8_R8_A8_UINT_NORMALIZED,
             1,
             item.size,

diff --git a/src/capture_method/__init__.py b/src/capture_method/__init__.py
@@ -7,21 +7,22 @@
 from typing import TYPE_CHECKING, TypedDict
 
 from pygrabber import dshow_graph
-from winsdk.windows.media.capture import MediaCapture
 
 from capture_method.BitBltCaptureMethod import BitBltCaptureMethod
 from capture_method.CaptureMethodBase import CaptureMethodBase
 from capture_method.DesktopDuplicationCaptureMethod import DesktopDuplicationCaptureMethod
 from capture_method.ForceFullContentRenderingCaptureMethod import ForceFullContentRenderingCaptureMethod
 from capture_method.VideoCaptureDeviceCaptureMethod import VideoCaptureDeviceCaptureMethod
 from capture_method.WindowsGraphicsCaptureMethod import WindowsGraphicsCaptureMethod
-from utils import WINDOWS_BUILD_NUMBER
+from utils import WINDOWS_BUILD_NUMBER, get_direct3d_device
 
 if TYPE_CHECKING:
     from AutoSplit import AutoSplit
 
 WGC_MIN_BUILD = 17134
 """https://docs.microsoft.com/en-us/uwp/api/windows.graphics.capture.graphicscapturepicker#applies-to"""
+LEARNING_MODE_DEVICE_BUILD = 17763
+"""https://learn.microsoft.com/en-us/uwp/api/windows.ai.machinelearning.learningmodeldevice"""
 
 
 class Region(TypedDict):
@@ -121,8 +122,8 @@ def __getitem__(self, key: CaptureMethodEnum):
         short_description="fast, most compatible, capped at 60fps",
         description=(
             f"\nOnly available in Windows 10.0.{WGC_MIN_BUILD} and up. "
-            "\nDue to current technical limitations, it requires having at least one "
-            "\naudio or video Capture Device connected and enabled. Even if it won't be used. "
+            f"\nDue to current technical limitations, Windows versions below 10.0.0.{LEARNING_MODE_DEVICE_BUILD}"
+            "\nrequire having at least one audio or video Capture Device connected and enabled."
             "\nAllows recording UWP apps, Hardware Accelerated and Exclusive Fullscreen windows. "
             "\nAdds a yellow border on Windows 10 (not on Windows 11)."
             "\nCaps at around 60 FPS. "
@@ -166,21 +167,18 @@ def __getitem__(self, key: CaptureMethodEnum):
 })
 
 
-def test_for_media_capture():
-    async def coroutine():
-        return await (MediaCapture().initialize_async() or asyncio.sleep(0))
+def try_get_direct3d_device():
     try:
-        asyncio.run(coroutine())
-        return True
+        return get_direct3d_device()
     except OSError:
-        return False
+        return None
 
 
 # Detect and remove unsupported capture methods
 if (  # Windows Graphics Capture requires a minimum Windows Build
     WINDOWS_BUILD_NUMBER < WGC_MIN_BUILD
-    # Our current implementation of Windows Graphics Capture requires at least one CaptureDevice
-    or not test_for_media_capture()
+    # Our current implementation of Windows Graphics Capture does not ensure we can get an ID3DDevice
+    or not try_get_direct3d_device()
 ):
     CAPTURE_METHODS.pop(CaptureMethodEnum.WINDOWS_GRAPHICS_CAPTURE)
 
@@ -202,10 +200,12 @@ class CameraInfo():
     name: str
     occupied: bool
     backend: str
+    size: tuple[int, int]
 
 
 async def get_all_video_capture_devices() -> list[CameraInfo]:
-    named_video_inputs = dshow_graph.FilterGraph().get_input_devices()
+    filter_graph = dshow_graph.FilterGraph()
+    named_video_inputs = filter_graph.get_input_devices()
 
     async def get_camera_info(index: int, device_name: str):
         backend = ""
@@ -225,7 +225,10 @@ async def get_camera_info(index: int, device_name: str):
         #         else None
         # finally:
         #     video_capture.release()
-        return CameraInfo(index, device_name, False, backend)
+        filter_graph.add_video_input_device(index)
+        size = filter_graph.get_input_device().get_current_format()
+        filter_graph.remove_filters()
+        return CameraInfo(index, device_name, False, backend, size)
 
     future = asyncio.gather(
         *[

diff --git a/src/compare.py b/src/compare.py
@@ -47,7 +47,7 @@ def compare_l2_norm(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = No
     error = cv2.norm(source, capture, cv2.NORM_L2, mask)
 
     # The L2 Error is summed across all pixels, so this normalizes
-    max_error = (source.size ** 0.5) * MAXBYTE \
+    max_error: float = (source.size ** 0.5) * MAXBYTE \
         if not is_valid_image(mask)\
         else (3 * np.count_nonzero(mask) * MAXBYTE * MAXBYTE) ** 0.5