From 225d7a7bd308ce9a9c4e735cf4286bd65cc36901 Mon Sep 17 00:00:00 2001 From: Xewdy444 Date: Sun, 18 Feb 2024 11:54:15 -0600 Subject: [PATCH] Added base solver classes + Made RecaptchaBox generic --- .../recaptchav2/async_solver.py | 31 +- .../recaptchav2/base_solver.py | 313 ++++++++++++++++++ .../recaptchav2/recaptcha_box.py | 67 ++-- .../recaptchav2/sync_solver.py | 33 +- .../recaptchav3/async_solver.py | 20 +- .../recaptchav3/base_solver.py | 75 +++++ .../recaptchav3/sync_solver.py | 20 +- 7 files changed, 432 insertions(+), 127 deletions(-) create mode 100644 playwright_recaptcha/recaptchav2/base_solver.py create mode 100644 playwright_recaptcha/recaptchav3/base_solver.py diff --git a/playwright_recaptcha/recaptchav2/async_solver.py b/playwright_recaptcha/recaptchav2/async_solver.py index e90ca45..1fe883e 100644 --- a/playwright_recaptcha/recaptchav2/async_solver.py +++ b/playwright_recaptcha/recaptchav2/async_solver.py @@ -3,7 +3,6 @@ import asyncio import base64 import functools -import os import random import re from concurrent.futures import ThreadPoolExecutor @@ -12,7 +11,7 @@ from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Union import speech_recognition -from playwright.async_api import APIResponse, Locator, Page, Response +from playwright.async_api import Locator, Page, Response from pydub import AudioSegment from tenacity import ( AsyncRetrying, @@ -27,6 +26,7 @@ RecaptchaRateLimitError, RecaptchaSolveError, ) +from .base_solver import BaseSolver from .recaptcha_box import AsyncRecaptchaBox @@ -60,7 +60,7 @@ async def __aexit__(self, *args: Any) -> None: await self._loop.run_in_executor(self._executor, self.__exit__, *args) -class AsyncSolver: +class AsyncSolver(BaseSolver[Page]): """ A class for solving reCAPTCHA v2 asynchronously with Playwright. @@ -75,24 +75,6 @@ class AsyncSolver: If None, the `CAPSOLVER_API_KEY` environment variable will be used. """ - def __init__( - self, page: Page, *, attempts: int = 5, capsolver_api_key: Optional[str] = None - ) -> None: - self._page = page - self._attempts = attempts - self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY") - - self._token: Optional[str] = None - self._payload_response: Union[APIResponse, Response, None] = None - self._page.on("response", self._response_callback) - - def __repr__(self) -> str: - return ( - f"AsyncSolver(page={self._page!r}, " - f"attempts={self._attempts!r}, " - f"capsolver_api_key={self._capsolver_api_key!r})" - ) - async def __aenter__(self) -> AsyncSolver: return self @@ -547,13 +529,6 @@ async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None await self._submit_audio_text(recaptcha_box, text) - def close(self) -> None: - """Remove the response listener.""" - try: - self._page.remove_listener("response", self._response_callback) - except KeyError: - pass - async def recaptcha_is_visible(self) -> bool: """ Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. diff --git a/playwright_recaptcha/recaptchav2/base_solver.py b/playwright_recaptcha/recaptchav2/base_solver.py new file mode 100644 index 0000000..5be6f43 --- /dev/null +++ b/playwright_recaptcha/recaptchav2/base_solver.py @@ -0,0 +1,313 @@ +import os +from abc import ABC, abstractmethod +from typing import Any, Dict, Generic, Iterable, Optional, TypeVar, Union + +from playwright.async_api import APIResponse as AsyncAPIResponse +from playwright.async_api import Page as AsyncPage +from playwright.async_api import Response as AsyncResponse +from playwright.sync_api import APIResponse as SyncAPIResponse +from playwright.sync_api import Page as SyncPage +from playwright.sync_api import Response as SyncResponse + +from .recaptcha_box import RecaptchaBox + +PageT = TypeVar("PageT", AsyncPage, SyncPage) +APIResponse = Union[AsyncAPIResponse, SyncAPIResponse] +Response = Union[AsyncResponse, SyncResponse] + + +class BaseSolver(ABC, Generic[PageT]): + """ + The base class for reCAPTCHA v2 solvers. + + Parameters + ---------- + page : PageT + The Playwright page to solve the reCAPTCHA on. + attempts : int, optional + The number of solve attempts, by default 5. + capsolver_api_key : Optional[str], optional + The CapSolver API key, by default None. + If None, the `CAPSOLVER_API_KEY` environment variable will be used. + """ + + def __init__( + self, page: PageT, *, attempts: int = 5, capsolver_api_key: Optional[str] = None + ) -> None: + self._page = page + self._attempts = attempts + self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY") + + self._token: Optional[str] = None + self._payload_response: Union[APIResponse, Response, None] = None + self._page.on("response", self._response_callback) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(page={self._page!r}, " + f"attempts={self._attempts!r}, " + f"capsolver_api_key={self._capsolver_api_key!r})" + ) + + @staticmethod + @abstractmethod + def _get_task_object(recaptcha_box: RecaptchaBox) -> Optional[str]: + """ + Get the ID of the object in the reCAPTCHA image challenge task. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Returns + ------- + Optional[str] + The object ID. Returns None if the task object is not recognized. + """ + + @abstractmethod + def _response_callback(self, response: Response) -> None: + """ + The callback for intercepting payload and userverify responses. + + Parameters + ---------- + response : Response + The response. + """ + + @abstractmethod + def _random_delay(self, short: bool = True) -> None: + """ + Delay the browser for a random amount of time. + + Parameters + ---------- + short : bool, optional + Whether to delay for a short amount of time, by default True. + """ + + @abstractmethod + def _get_capsolver_response( + self, recaptcha_box: RecaptchaBox, image_data: bytes + ) -> Optional[Dict[str, Any]]: + """ + Get the CapSolver JSON response for an image. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + image_data : bytes + The image data. + + Returns + ------- + Optional[Dict[str, Any]] + The CapSolver JSON response. + Returns None if the task object is not recognized. + + Raises + ------ + CapSolverError + If the CapSolver API returned an error. + """ + + @abstractmethod + def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> None: + """ + Solve the tiles in the reCAPTCHA image challenge. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + indexes : Iterable[int] + The indexes of the tiles that contain the task object. + + Raises + ------ + CapSolverError + If the CapSolver API returned an error. + """ + + @abstractmethod + def _convert_audio_to_text(self, audio_url: str) -> Optional[str]: + """ + Convert the reCAPTCHA audio to text. + + Parameters + ---------- + audio_url : str + The reCAPTCHA audio URL. + + Returns + ------- + Optional[str] + The reCAPTCHA audio text. Returns None if the audio could not be converted. + """ + + @abstractmethod + def _click_checkbox(self, recaptcha_box: RecaptchaBox) -> None: + """ + Click the reCAPTCHA checkbox. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Raises + ------ + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + @abstractmethod + def _get_audio_url(self, recaptcha_box: RecaptchaBox) -> str: + """ + Get the reCAPTCHA audio URL. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Returns + ------- + str + The reCAPTCHA audio URL. + + Raises + ------ + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + @abstractmethod + def _submit_audio_text(self, recaptcha_box: RecaptchaBox, text: str) -> None: + """ + Submit the reCAPTCHA audio text. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + text : str + The reCAPTCHA audio text. + + Raises + ------ + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + @abstractmethod + def _submit_tile_answers(self, recaptcha_box: RecaptchaBox) -> None: + """ + Submit the reCAPTCHA image challenge tile answers. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Raises + ------ + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + @abstractmethod + def _solve_image_challenge(self, recaptcha_box: RecaptchaBox) -> None: + """ + Solve the reCAPTCHA image challenge. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Raises + ------ + CapSolverError + If the CapSolver API returned an error. + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + @abstractmethod + def _solve_audio_challenge(self, recaptcha_box: RecaptchaBox) -> None: + """ + Solve the reCAPTCHA audio challenge. + + Parameters + ---------- + recaptcha_box : RecaptchaBox + The reCAPTCHA box. + + Raises + ------ + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + """ + + def close(self) -> None: + """Remove the response listener.""" + try: + self._page.remove_listener("response", self._response_callback) + except KeyError: + pass + + @abstractmethod + def recaptcha_is_visible(self) -> bool: + """ + Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. + + Returns + ------- + bool + Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. + """ + + @abstractmethod + def solve_recaptcha( + self, + *, + attempts: Optional[int] = None, + wait: bool = False, + wait_timeout: float = 30, + image_challenge: bool = False, + ) -> str: + """ + Solve the reCAPTCHA and return the `g-recaptcha-response` token. + + Parameters + ---------- + attempts : Optional[int], optional + The number of solve attempts, by default 5. + wait : bool, optional + Whether to wait for the reCAPTCHA to appear, by default False. + wait_timeout : float, optional + The amount of time in seconds to wait for the reCAPTCHA to appear, + by default 30. Only used if `wait` is True. + image_challenge : bool, optional + Whether to solve the image challenge, by default False. + + Returns + ------- + str + The `g-recaptcha-response` token. + + Raises + ------ + CapSolverError + If the CapSolver API returned an error. + RecaptchaNotFoundError + If the reCAPTCHA was not found. + RecaptchaRateLimitError + If the reCAPTCHA rate limit has been exceeded. + RecaptchaSolveError + If the reCAPTCHA could not be solved. + """ diff --git a/playwright_recaptcha/recaptchav2/recaptcha_box.py b/playwright_recaptcha/recaptchav2/recaptcha_box.py index a9d6f9a..2a1361e 100644 --- a/playwright_recaptcha/recaptchav2/recaptcha_box.py +++ b/playwright_recaptcha/recaptchav2/recaptcha_box.py @@ -4,7 +4,7 @@ import re from abc import ABC, abstractmethod from functools import wraps -from typing import Iterable, List, Tuple, Union +from typing import Generic, Iterable, List, Tuple, TypeVar, Union from playwright.async_api import Frame as AsyncFrame from playwright.async_api import Locator as AsyncLocator @@ -13,28 +13,47 @@ from ..errors import RecaptchaNotFoundError +FrameT = TypeVar("FrameT", AsyncFrame, SyncFrame) Locator = Union[AsyncLocator, SyncLocator] -Frame = Union[AsyncFrame, SyncFrame] -class RecaptchaBox(ABC): - """The base class for reCAPTCHA v2 boxes.""" +class RecaptchaBox(ABC, Generic[FrameT]): + """ + The base class for reCAPTCHA v2 boxes. + + Parameters + ---------- + anchor_frame : FrameT + The reCAPTCHA anchor frame. + bframe_frame : FrameT + The reCAPTCHA bframe frame. + """ + + def __init__(self, anchor_frame: FrameT, bframe_frame: FrameT) -> None: + self._anchor_frame = anchor_frame + self._bframe_frame = bframe_frame + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(anchor_frame={self._anchor_frame!r}, " + f"bframe_frame={self._bframe_frame!r})" + ) @staticmethod def _get_recaptcha_frame_pairs( - frames: Iterable[Frame], - ) -> List[Tuple[Frame, Frame]]: + frames: Iterable[FrameT], + ) -> List[Tuple[FrameT, FrameT]]: """ Get the reCAPTCHA anchor and bframe frame pairs. Parameters ---------- - frames : Iterable[Frame] + frames : Iterable[FrameT] A list of frames to search for the reCAPTCHA anchor and bframe frames. Returns ------- - List[Tuple[Frame, Frame]] + List[Tuple[FrameT, FrameT]] A list of reCAPTCHA anchor and bframe frame pairs. Raises @@ -177,23 +196,23 @@ def frames_are_detached(self) -> bool: @property @abstractmethod - def anchor_frame(self) -> Frame: + def anchor_frame(self) -> FrameT: """The reCAPTCHA anchor frame.""" @property @abstractmethod - def bframe_frame(self) -> Frame: + def bframe_frame(self) -> FrameT: """The reCAPTCHA bframe frame.""" @classmethod @abstractmethod - def from_frames(cls, frames: Iterable[Frame]) -> RecaptchaBox: + def from_frames(cls, frames: Iterable[FrameT]) -> RecaptchaBox: """ Create a reCAPTCHA box using a list of frames. Parameters ---------- - frames : Iterable[Frame] + frames : Iterable[FrameT] A list of frames to search for the reCAPTCHA frames. Returns @@ -298,7 +317,7 @@ def challenge_is_solved(self) -> bool: """ -class SyncRecaptchaBox(RecaptchaBox): +class SyncRecaptchaBox(RecaptchaBox[SyncFrame]): """ The synchronous class for reCAPTCHA v2 boxes. @@ -310,16 +329,6 @@ class SyncRecaptchaBox(RecaptchaBox): The reCAPTCHA bframe frame. """ - def __init__(self, anchor_frame: SyncFrame, bframe_frame: SyncFrame) -> None: - self._anchor_frame = anchor_frame - self._bframe_frame = bframe_frame - - def __repr__(self) -> str: - return ( - f"SyncRecaptchaBox(anchor_frame={self._anchor_frame!r}, " - f"bframe_frame={self._bframe_frame!r})" - ) - @classmethod def from_frames(cls, frames: Iterable[SyncFrame]) -> SyncRecaptchaBox: """ @@ -476,7 +485,7 @@ def challenge_is_solved(self) -> bool: return self.checkbox.is_visible() and self.checkbox.is_checked() -class AsyncRecaptchaBox(RecaptchaBox): +class AsyncRecaptchaBox(RecaptchaBox[AsyncFrame]): """ The asynchronous class for reCAPTCHA v2 boxes. @@ -488,16 +497,6 @@ class AsyncRecaptchaBox(RecaptchaBox): The reCAPTCHA bframe frame. """ - def __init__(self, anchor_frame: AsyncFrame, bframe_frame: AsyncFrame) -> None: - self._anchor_frame = anchor_frame - self._bframe_frame = bframe_frame - - def __repr__(self) -> str: - return ( - f"AsyncRecaptchaBox(anchor_frame={self._anchor_frame!r}, " - f"bframe_frame={self._bframe_frame!r})" - ) - @classmethod async def from_frames(cls, frames: Iterable[AsyncFrame]) -> AsyncRecaptchaBox: """ diff --git a/playwright_recaptcha/recaptchav2/sync_solver.py b/playwright_recaptcha/recaptchav2/sync_solver.py index 06053b1..cf72e2f 100644 --- a/playwright_recaptcha/recaptchav2/sync_solver.py +++ b/playwright_recaptcha/recaptchav2/sync_solver.py @@ -1,15 +1,14 @@ from __future__ import annotations import base64 -import os import random import re from io import BytesIO from json import JSONDecodeError -from typing import Any, Dict, Iterable, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional import speech_recognition -from playwright.sync_api import APIResponse, Locator, Page, Response +from playwright.sync_api import Locator, Page, Response from pydub import AudioSegment from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed @@ -19,10 +18,11 @@ RecaptchaRateLimitError, RecaptchaSolveError, ) +from .base_solver import BaseSolver from .recaptcha_box import SyncRecaptchaBox -class SyncSolver: +class SyncSolver(BaseSolver[Page]): """ A class for solving reCAPTCHA v2 synchronously with Playwright. @@ -37,24 +37,6 @@ class SyncSolver: If None, the `CAPSOLVER_API_KEY` environment variable will be used. """ - def __init__( - self, page: Page, *, attempts: int = 5, capsolver_api_key: Optional[str] = None - ) -> None: - self._page = page - self._attempts = attempts - self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY") - - self._token: Optional[str] = None - self._payload_response: Union[APIResponse, Response, None] = None - self._page.on("response", self._response_callback) - - def __repr__(self) -> str: - return ( - f"SyncSolver(page={self._page!r}, " - f"attempts={self._attempts!r}, " - f"capsolver_api_key={self._capsolver_api_key!r})" - ) - def __enter__(self) -> SyncSolver: return self @@ -483,13 +465,6 @@ def _solve_audio_challenge(self, recaptcha_box: SyncRecaptchaBox) -> None: self._submit_audio_text(recaptcha_box, text) - def close(self) -> None: - """Remove the response listener.""" - try: - self._page.remove_listener("response", self._response_callback) - except KeyError: - pass - def recaptcha_is_visible(self) -> bool: """ Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. diff --git a/playwright_recaptcha/recaptchav3/async_solver.py b/playwright_recaptcha/recaptchav3/async_solver.py index c6cc176..e097bb4 100644 --- a/playwright_recaptcha/recaptchav3/async_solver.py +++ b/playwright_recaptcha/recaptchav3/async_solver.py @@ -7,9 +7,10 @@ from playwright.async_api import Page, Response from ..errors import RecaptchaTimeoutError +from .base_solver import BaseSolver -class AsyncSolver: +class AsyncSolver(BaseSolver[Page]): """ A class for solving reCAPTCHA v3 asynchronously with Playwright. @@ -21,16 +22,6 @@ class AsyncSolver: The solve timeout in seconds, by default 30. """ - def __init__(self, page: Page, timeout: float = 30) -> None: - self._page = page - self._timeout = timeout - - self._token: Optional[str] = None - self._page.on("response", self._response_callback) - - def __repr__(self) -> str: - return f"AsyncSolver(page={self._page!r}, timeout={self._timeout!r})" - async def __aenter__(self) -> AsyncSolver: return self @@ -54,13 +45,6 @@ async def _response_callback(self, response: Response) -> None: if token_match is not None: self._token = token_match.group(1) - def close(self) -> None: - """Remove the reload response listener.""" - try: - self._page.remove_listener("response", self._response_callback) - except KeyError: - pass - async def solve_recaptcha(self, timeout: Optional[float] = None) -> str: """ Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token. diff --git a/playwright_recaptcha/recaptchav3/base_solver.py b/playwright_recaptcha/recaptchav3/base_solver.py new file mode 100644 index 0000000..e45861b --- /dev/null +++ b/playwright_recaptcha/recaptchav3/base_solver.py @@ -0,0 +1,75 @@ +from abc import ABC, abstractmethod +from typing import Generic, Optional, TypeVar, Union + +from playwright.async_api import Page as AsyncPage +from playwright.async_api import Response as AsyncResponse +from playwright.sync_api import Page as SyncPage +from playwright.sync_api import Response as SyncResponse + +PageT = TypeVar("PageT", AsyncPage, SyncPage) +Response = Union[AsyncResponse, SyncResponse] + + +class BaseSolver(ABC, Generic[PageT]): + """ + The base class for reCAPTCHA v3 solvers. + + Parameters + ---------- + page : PageT + The Playwright page to solve the reCAPTCHA on. + timeout : float, optional + The solve timeout in seconds, by default 30. + """ + + def __init__(self, page: PageT, timeout: float = 30) -> None: + self._page = page + self._timeout = timeout + + self._token: Optional[str] = None + self._page.on("response", self._response_callback) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(page={self._page!r}, " + f"timeout={self._timeout!r})" + ) + + @abstractmethod + def _response_callback(self, response: Response) -> None: + """ + The callback for intercepting reload responses. + + Parameters + ---------- + response : Response + The response. + """ + + def close(self) -> None: + """Remove the reload response listener.""" + try: + self._page.remove_listener("response", self._response_callback) + except KeyError: + pass + + @abstractmethod + def solve_recaptcha(self, timeout: Optional[float] = None) -> str: + """ + Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token. + + Parameters + ---------- + timeout : Optional[float], optional + The solve timeout in seconds, by default 30. + + Returns + ------- + str + The `g-recaptcha-response` token. + + Raises + ------ + RecaptchaTimeoutError + If the solve timeout has been exceeded. + """ diff --git a/playwright_recaptcha/recaptchav3/sync_solver.py b/playwright_recaptcha/recaptchav3/sync_solver.py index b4f5ec7..efc1ea3 100644 --- a/playwright_recaptcha/recaptchav3/sync_solver.py +++ b/playwright_recaptcha/recaptchav3/sync_solver.py @@ -7,9 +7,10 @@ from playwright.sync_api import Page, Response from ..errors import RecaptchaTimeoutError +from .base_solver import BaseSolver -class SyncSolver: +class SyncSolver(BaseSolver[Page]): """ A class for solving reCAPTCHA v3 synchronously with Playwright. @@ -21,16 +22,6 @@ class SyncSolver: The solve timeout in seconds, by default 30. """ - def __init__(self, page: Page, timeout: float = 30) -> None: - self._page = page - self._timeout = timeout - - self._token: Optional[str] = None - self._page.on("response", self._response_callback) - - def __repr__(self) -> str: - return f"SyncSolver(page={self._page!r}, timeout={self._timeout!r})" - def __enter__(self) -> SyncSolver: return self @@ -54,13 +45,6 @@ def _response_callback(self, response: Response) -> None: if token_match is not None: self._token = token_match.group(1) - def close(self) -> None: - """Remove the reload response listener.""" - try: - self._page.remove_listener("response", self._response_callback) - except KeyError: - pass - def solve_recaptcha(self, timeout: Optional[float] = None) -> str: """ Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token.