Skip to content

Commit

Permalink
Added base solver classes + Made RecaptchaBox generic
Browse files Browse the repository at this point in the history
  • Loading branch information
Xewdy444 committed Feb 18, 2024
1 parent 7e88f3d commit 225d7a7
Show file tree
Hide file tree
Showing 7 changed files with 432 additions and 127 deletions.
31 changes: 3 additions & 28 deletions playwright_recaptcha/recaptchav2/async_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import asyncio
import base64
import functools
import os
import random
import re
from concurrent.futures import ThreadPoolExecutor
Expand All @@ -12,7 +11,7 @@
from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Union

import speech_recognition
from playwright.async_api import APIResponse, Locator, Page, Response
from playwright.async_api import Locator, Page, Response
from pydub import AudioSegment
from tenacity import (
AsyncRetrying,
Expand All @@ -27,6 +26,7 @@
RecaptchaRateLimitError,
RecaptchaSolveError,
)
from .base_solver import BaseSolver
from .recaptcha_box import AsyncRecaptchaBox


Expand Down Expand Up @@ -60,7 +60,7 @@ async def __aexit__(self, *args: Any) -> None:
await self._loop.run_in_executor(self._executor, self.__exit__, *args)


class AsyncSolver:
class AsyncSolver(BaseSolver[Page]):
"""
A class for solving reCAPTCHA v2 asynchronously with Playwright.
Expand All @@ -75,24 +75,6 @@ class AsyncSolver:
If None, the `CAPSOLVER_API_KEY` environment variable will be used.
"""

def __init__(
self, page: Page, *, attempts: int = 5, capsolver_api_key: Optional[str] = None
) -> None:
self._page = page
self._attempts = attempts
self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY")

self._token: Optional[str] = None
self._payload_response: Union[APIResponse, Response, None] = None
self._page.on("response", self._response_callback)

def __repr__(self) -> str:
return (
f"AsyncSolver(page={self._page!r}, "
f"attempts={self._attempts!r}, "
f"capsolver_api_key={self._capsolver_api_key!r})"
)

async def __aenter__(self) -> AsyncSolver:
return self

Expand Down Expand Up @@ -547,13 +529,6 @@ async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None

await self._submit_audio_text(recaptcha_box, text)

def close(self) -> None:
"""Remove the response listener."""
try:
self._page.remove_listener("response", self._response_callback)
except KeyError:
pass

async def recaptcha_is_visible(self) -> bool:
"""
Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
Expand Down
313 changes: 313 additions & 0 deletions playwright_recaptcha/recaptchav2/base_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
import os
from abc import ABC, abstractmethod
from typing import Any, Dict, Generic, Iterable, Optional, TypeVar, Union

from playwright.async_api import APIResponse as AsyncAPIResponse
from playwright.async_api import Page as AsyncPage
from playwright.async_api import Response as AsyncResponse
from playwright.sync_api import APIResponse as SyncAPIResponse
from playwright.sync_api import Page as SyncPage
from playwright.sync_api import Response as SyncResponse

from .recaptcha_box import RecaptchaBox

PageT = TypeVar("PageT", AsyncPage, SyncPage)
APIResponse = Union[AsyncAPIResponse, SyncAPIResponse]
Response = Union[AsyncResponse, SyncResponse]


class BaseSolver(ABC, Generic[PageT]):
"""
The base class for reCAPTCHA v2 solvers.
Parameters
----------
page : PageT
The Playwright page to solve the reCAPTCHA on.
attempts : int, optional
The number of solve attempts, by default 5.
capsolver_api_key : Optional[str], optional
The CapSolver API key, by default None.
If None, the `CAPSOLVER_API_KEY` environment variable will be used.
"""

def __init__(
self, page: PageT, *, attempts: int = 5, capsolver_api_key: Optional[str] = None
) -> None:
self._page = page
self._attempts = attempts
self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY")

self._token: Optional[str] = None
self._payload_response: Union[APIResponse, Response, None] = None
self._page.on("response", self._response_callback)

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(page={self._page!r}, "
f"attempts={self._attempts!r}, "
f"capsolver_api_key={self._capsolver_api_key!r})"
)

@staticmethod
@abstractmethod
def _get_task_object(recaptcha_box: RecaptchaBox) -> Optional[str]:
"""
Get the ID of the object in the reCAPTCHA image challenge task.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Returns
-------
Optional[str]
The object ID. Returns None if the task object is not recognized.
"""

@abstractmethod
def _response_callback(self, response: Response) -> None:
"""
The callback for intercepting payload and userverify responses.
Parameters
----------
response : Response
The response.
"""

@abstractmethod
def _random_delay(self, short: bool = True) -> None:
"""
Delay the browser for a random amount of time.
Parameters
----------
short : bool, optional
Whether to delay for a short amount of time, by default True.
"""

@abstractmethod
def _get_capsolver_response(
self, recaptcha_box: RecaptchaBox, image_data: bytes
) -> Optional[Dict[str, Any]]:
"""
Get the CapSolver JSON response for an image.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
image_data : bytes
The image data.
Returns
-------
Optional[Dict[str, Any]]
The CapSolver JSON response.
Returns None if the task object is not recognized.
Raises
------
CapSolverError
If the CapSolver API returned an error.
"""

@abstractmethod
def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> None:
"""
Solve the tiles in the reCAPTCHA image challenge.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
indexes : Iterable[int]
The indexes of the tiles that contain the task object.
Raises
------
CapSolverError
If the CapSolver API returned an error.
"""

@abstractmethod
def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
"""
Convert the reCAPTCHA audio to text.
Parameters
----------
audio_url : str
The reCAPTCHA audio URL.
Returns
-------
Optional[str]
The reCAPTCHA audio text. Returns None if the audio could not be converted.
"""

@abstractmethod
def _click_checkbox(self, recaptcha_box: RecaptchaBox) -> None:
"""
Click the reCAPTCHA checkbox.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _get_audio_url(self, recaptcha_box: RecaptchaBox) -> str:
"""
Get the reCAPTCHA audio URL.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Returns
-------
str
The reCAPTCHA audio URL.
Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _submit_audio_text(self, recaptcha_box: RecaptchaBox, text: str) -> None:
"""
Submit the reCAPTCHA audio text.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
text : str
The reCAPTCHA audio text.
Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _submit_tile_answers(self, recaptcha_box: RecaptchaBox) -> None:
"""
Submit the reCAPTCHA image challenge tile answers.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _solve_image_challenge(self, recaptcha_box: RecaptchaBox) -> None:
"""
Solve the reCAPTCHA image challenge.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Raises
------
CapSolverError
If the CapSolver API returned an error.
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _solve_audio_challenge(self, recaptcha_box: RecaptchaBox) -> None:
"""
Solve the reCAPTCHA audio challenge.
Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

def close(self) -> None:
"""Remove the response listener."""
try:
self._page.remove_listener("response", self._response_callback)
except KeyError:
pass

@abstractmethod
def recaptcha_is_visible(self) -> bool:
"""
Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
Returns
-------
bool
Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
"""

@abstractmethod
def solve_recaptcha(
self,
*,
attempts: Optional[int] = None,
wait: bool = False,
wait_timeout: float = 30,
image_challenge: bool = False,
) -> str:
"""
Solve the reCAPTCHA and return the `g-recaptcha-response` token.
Parameters
----------
attempts : Optional[int], optional
The number of solve attempts, by default 5.
wait : bool, optional
Whether to wait for the reCAPTCHA to appear, by default False.
wait_timeout : float, optional
The amount of time in seconds to wait for the reCAPTCHA to appear,
by default 30. Only used if `wait` is True.
image_challenge : bool, optional
Whether to solve the image challenge, by default False.
Returns
-------
str
The `g-recaptcha-response` token.
Raises
------
CapSolverError
If the CapSolver API returned an error.
RecaptchaNotFoundError
If the reCAPTCHA was not found.
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
RecaptchaSolveError
If the reCAPTCHA could not be solved.
"""
Loading

0 comments on commit 225d7a7

Please sign in to comment.