Skip to content

Commit

Permalink
Merge pull request #100 from Xewdy444/new-translations
Browse files Browse the repository at this point in the history
Add new translations
  • Loading branch information
Xewdy444 authored Jun 3, 2024
2 parents 94d4eec + 9679b9a commit 67f2b36
Show file tree
Hide file tree
Showing 6 changed files with 592 additions and 211 deletions.
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ All solvers return the `g-recaptcha-response` token, which is required for form
## Installation
pip install playwright-recaptcha

This library requires FFmpeg to be installed on your system in order to convert the audio challenge from reCAPTCHA v2 into text.
This library requires FFmpeg to be installed on your system for the transcription of reCAPTCHA v2 audio challenges.

| OS | Command |
| :-----: | :--------------------: |
Expand All @@ -48,6 +48,19 @@ You can also download the latest static build from [here](https://ffmpeg.org/dow
> **Note**
> Make sure to have the ffmpeg and ffprobe binaries in your system's PATH so that pydub can find them.
## Supported Languages
- Chinese (zh-CN)
- Dutch (nl)
- English (en)
- French (fr)
- German (de)
- Italian (it)
- Portuguese (pt)
- Russian (ru)
- Spanish (es)

If you would like to request support for a new language, please open an issue. You can also open a pull request if you would like to contribute.

## reCAPTCHA v2 Example
For more reCAPTCHA v2 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav2).

Expand All @@ -65,7 +78,7 @@ with sync_playwright() as playwright:
print(token)
```

If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`.
By default, the audio challenge will be solved. If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`.

```python
with recaptchav2.SyncSolver(page, capsolver_api_key="your_api_key") as solver:
Expand Down
159 changes: 87 additions & 72 deletions playwright_recaptcha/recaptchav2/async_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
import asyncio
import base64
import functools
import random
import re
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from io import BytesIO
from json import JSONDecodeError
from typing import Any, BinaryIO, Dict, List, Optional, Union
from urllib.parse import parse_qs, urlparse

import speech_recognition
from playwright.async_api import Locator, Page, Response
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
from tenacity import (
AsyncRetrying,
retry_if_exception_type,
Expand All @@ -28,7 +30,7 @@
)
from .base_solver import BaseSolver
from .recaptcha_box import AsyncRecaptchaBox
from .translations import TRANSLATIONS
from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO


class AsyncAudioFile(speech_recognition.AudioFile):
Expand Down Expand Up @@ -98,28 +100,29 @@ async def _get_task_object(recaptcha_box: AsyncRecaptchaBox) -> Optional[str]:
The object ID. Returns None if the task object is not recognized.
"""
object_dict = {
"/m/0pg52": TRANSLATIONS["taxis"],
"/m/01bjv": TRANSLATIONS["bus"],
"/m/04_sv": TRANSLATIONS["motorcycles"],
"/m/013xlm": TRANSLATIONS["tractors"],
"/m/01jk_4": TRANSLATIONS["chimneys"],
"/m/014xcs": TRANSLATIONS["crosswalks"],
"/m/015qff": TRANSLATIONS["traffic_lights"],
"/m/0199g": TRANSLATIONS["bicycles"],
"/m/015qbp": TRANSLATIONS["parking_meters"],
"/m/0k4j": TRANSLATIONS["cars"],
"/m/015kr": TRANSLATIONS["bridges"],
"/m/019jd": TRANSLATIONS["boats"],
"/m/0cdl1": TRANSLATIONS["palm_trees"],
"/m/09d_r": TRANSLATIONS["mountains_or_hills"],
"/m/01pns0": TRANSLATIONS["fire_hydrant"],
"/m/01lynh": TRANSLATIONS["stairs"],
"/m/0pg52": OBJECT_TRANSLATIONS["taxis"],
"/m/01bjv": OBJECT_TRANSLATIONS["bus"],
"/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"],
"/m/013xlm": OBJECT_TRANSLATIONS["tractors"],
"/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"],
"/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"],
"/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"],
"/m/0199g": OBJECT_TRANSLATIONS["bicycles"],
"/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"],
"/m/0k4j": OBJECT_TRANSLATIONS["cars"],
"/m/015kr": OBJECT_TRANSLATIONS["bridges"],
"/m/019jd": OBJECT_TRANSLATIONS["boats"],
"/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"],
"/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"],
"/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"],
"/m/01lynh": OBJECT_TRANSLATIONS["stairs"],
}

task = await recaptcha_box.bframe_frame.locator("div").all_inner_texts()
object_ = task[0].split("\n")[1]

for object_id, translations in object_dict.items():
if any(translation in task[0] for translation in translations):
if object_ in translations:
return object_id

return None
Expand Down Expand Up @@ -147,18 +150,6 @@ async def _response_callback(self, response: Response) -> None:
if token_match is not None:
self._token = token_match.group(1)

async def _random_delay(self, short: bool = True) -> None:
"""
Delay the browser for a random amount of time.
Parameters
----------
short : bool, optional
Whether to delay for a short amount of time, by default True.
"""
delay_time = random.randint(150, 350) if short else random.randint(1250, 1500)
await self._page.wait_for_timeout(delay_time)

async def _get_capsolver_response(
self, recaptcha_box: AsyncRecaptchaBox, image_data: bytes
) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -230,29 +221,38 @@ async def _solve_tiles(
CapSolverError
If the CapSolver API returned an error.
"""
changing_tiles: List[Locator] = []
changing_tiles: Dict[Locator, str] = {}
indexes = indexes.copy()
random.shuffle(indexes)

style_script = """
(element) => {
element.style = "";
element.className = "rc-imageselect-tile";
}
"""

for index in indexes:
tile = recaptcha_box.tile_selector.nth(index)
await tile.click()

if "rc-imageselect-dynamic-selected" in await tile.get_attribute("class"):
changing_tiles.append(tile)
if "rc-imageselect-dynamic-selected" not in await tile.get_attribute(
"class"
):
continue

await self._random_delay()
changing_tiles[tile] = await tile.locator("img").get_attribute("src")
await tile.evaluate(style_script)

while changing_tiles:
random.shuffle(changing_tiles)
start_time = datetime.now()

while changing_tiles and (datetime.now() - start_time).seconds < 60:
for tile in changing_tiles.copy():
if "rc-imageselect-dynamic-selected" in await tile.get_attribute(
"class"
):
image_url = await tile.locator("img").get_attribute("src")

if changing_tiles[tile] == image_url:
continue

image_url = await tile.locator("img").get_attribute("src")
changing_tiles[tile] = image_url
response = await self._page.request.get(image_url)

capsolver_response = await self._get_capsolver_response(
Expand All @@ -263,33 +263,43 @@ async def _solve_tiles(
capsolver_response is None
or not capsolver_response["solution"]["hasObject"]
):
changing_tiles.remove(tile)
else:
await tile.click()
changing_tiles.pop(tile)
continue

async def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
await tile.click()
await tile.evaluate(style_script)

async def _transcribe_audio(
self, audio_url: str, *, language: str = "en-US"
) -> Optional[str]:
"""
Convert the reCAPTCHA audio to text.
Transcribe the reCAPTCHA audio challenge.
Parameters
----------
audio_url : str
The reCAPTCHA audio URL.
language : str, optional
The language of the audio, by default en-US.
Returns
-------
Optional[str]
The reCAPTCHA audio text. Returns None if the audio could not be converted.
The reCAPTCHA audio text.
Returns None if the audio could not be converted.
"""
loop = asyncio.get_event_loop()
response = await self._page.request.get(audio_url)

wav_audio = BytesIO()
mp3_audio = BytesIO(await response.body())

audio: AudioSegment = await loop.run_in_executor(
None, AudioSegment.from_mp3, mp3_audio
)
try:
audio: AudioSegment = await loop.run_in_executor(
None, AudioSegment.from_mp3, mp3_audio
)
except CouldntDecodeError:
return None

await loop.run_in_executor(
None, functools.partial(audio.export, wav_audio, format="wav")
Expand All @@ -302,7 +312,10 @@ async def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:

try:
return await loop.run_in_executor(
None, recognizer.recognize_google, audio_data
None,
functools.partial(
recognizer.recognize_google, audio_data, language=language
),
)
except speech_recognition.UnknownValueError:
return None
Expand Down Expand Up @@ -457,8 +470,6 @@ async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
If the reCAPTCHA rate limit has been exceeded.
"""
while recaptcha_box.frames_are_attached():
await self._random_delay()

capsolver_response = await self._get_capsolver_response(
recaptcha_box, await self._payload_response.body()
)
Expand All @@ -470,33 +481,35 @@ async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
self._payload_response = None

async with self._page.expect_response(
re.compile("/recaptcha/(api2|enterprise)/payload")
re.compile("/recaptcha/(api2|enterprise)/reload")
) as response:
await recaptcha_box.new_challenge_button.click()

await response.value

while self._payload_response is None:
if await recaptcha_box.rate_limit_is_visible():
raise RecaptchaRateLimitError

await self._page.wait_for_timeout(250)

continue

await self._solve_tiles(
recaptcha_box, capsolver_response["solution"]["objects"]
)

await self._random_delay()

self._payload_response = None
button = recaptcha_box.skip_button.or_(recaptcha_box.next_button)

if await button.is_visible():
async with self._page.expect_response(
re.compile("/recaptcha/(api2|enterprise)/payload")
) as response:
await recaptcha_box.new_challenge_button.click()

await response.value
continue
if await button.is_hidden():
await self._submit_tile_answers(recaptcha_box)
return

await self._submit_tile_answers(recaptcha_box)
return
async with self._page.expect_response(
re.compile("/recaptcha/(api2|enterprise)/payload")
):
await button.click()

async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None:
"""
Expand All @@ -512,11 +525,16 @@ async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""
await self._random_delay(short=False)
parsed_url = urlparse(recaptcha_box.anchor_frame.url)
query_params = parse_qs(parsed_url.query)
language = query_params["hl"][0]

if language not in ORIGINAL_LANGUAGE_AUDIO:
language = "en-US"

while True:
url = await self._get_audio_url(recaptcha_box)
text = await self._convert_audio_to_text(url)
text = await self._transcribe_audio(url, language=language)

if text is not None:
break
Expand Down Expand Up @@ -651,9 +669,6 @@ async def solve_recaptcha(

return self._token

if not image_challenge:
await recaptcha_box.new_challenge_button.click()

attempts -= 1

raise RecaptchaSolveError
20 changes: 6 additions & 14 deletions playwright_recaptcha/recaptchav2/base_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,6 @@ def _response_callback(self, response: Response) -> None:
The response.
"""

@abstractmethod
def _random_delay(self, short: bool = True) -> None:
"""
Delay the browser for a random amount of time.
Parameters
----------
short : bool, optional
Whether to delay for a short amount of time, by default True.
"""

@abstractmethod
def _get_capsolver_response(
self, recaptcha_box: RecaptchaBox, image_data: bytes
Expand Down Expand Up @@ -140,19 +129,22 @@ def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> N
"""

@abstractmethod
def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
def _transcribe_audio(self, audio_url: str, *, language: str) -> Optional[str]:
"""
Convert the reCAPTCHA audio to text.
Transcribe the reCAPTCHA audio challenge.
Parameters
----------
audio_url : str
The reCAPTCHA audio URL.
language : str
The language of the audio.
Returns
-------
Optional[str]
The reCAPTCHA audio text. Returns None if the audio could not be converted.
The reCAPTCHA audio text.
Returns None if the audio could not be converted.
"""

@abstractmethod
Expand Down
Loading

0 comments on commit 67f2b36

Please sign in to comment.