Merge pull request #100 from Xewdy444/new-translations

Add new translations
Xewdy444 · Jun 3, 2024 · 67f2b36 · 67f2b36
2 parents 94d4eec + 9679b9a
commit 67f2b36
Show file tree

Hide file tree

Showing 6 changed files with 592 additions and 211 deletions.
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ All solvers return the `g-recaptcha-response` token, which is required for form
 ## Installation
  pip install playwright-recaptcha
 
-This library requires FFmpeg to be installed on your system in order to convert the audio challenge from reCAPTCHA v2 into text.
+This library requires FFmpeg to be installed on your system for the transcription of reCAPTCHA v2 audio challenges.
 
 | OS | Command |
 | :-----: | :--------------------: |
@@ -48,6 +48,19 @@ You can also download the latest static build from [here](https://ffmpeg.org/dow
 > **Note**
 > Make sure to have the ffmpeg and ffprobe binaries in your system's PATH so that pydub can find them.
 
+## Supported Languages
+- Chinese (zh-CN)
+- Dutch (nl)
+- English (en)
+- French (fr)
+- German (de)
+- Italian (it)
+- Portuguese (pt)
+- Russian (ru)
+- Spanish (es)
+
+If you would like to request support for a new language, please open an issue. You can also open a pull request if you would like to contribute.
+
 ## reCAPTCHA v2 Example
 For more reCAPTCHA v2 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav2).
 
@@ -65,7 +78,7 @@ with sync_playwright() as playwright:
  print(token)
 ```
 
-If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`.
+By default, the audio challenge will be solved. If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`.
 
 ```python
 with recaptchav2.SyncSolver(page, capsolver_api_key="your_api_key") as solver:

diff --git a/playwright_recaptcha/recaptchav2/async_solver.py b/playwright_recaptcha/recaptchav2/async_solver.py
@@ -3,16 +3,18 @@
 import asyncio
 import base64
 import functools
-import random
 import re
 from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
 from io import BytesIO
 from json import JSONDecodeError
 from typing import Any, BinaryIO, Dict, List, Optional, Union
+from urllib.parse import parse_qs, urlparse
 
 import speech_recognition
 from playwright.async_api import Locator, Page, Response
 from pydub import AudioSegment
+from pydub.exceptions import CouldntDecodeError
 from tenacity import (
  AsyncRetrying,
  retry_if_exception_type,
@@ -28,7 +30,7 @@
 )
 from .base_solver import BaseSolver
 from .recaptcha_box import AsyncRecaptchaBox
-from .translations import TRANSLATIONS
+from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO
 
 
 class AsyncAudioFile(speech_recognition.AudioFile):
@@ -98,28 +100,29 @@ async def _get_task_object(recaptcha_box: AsyncRecaptchaBox) -> Optional[str]:
  The object ID. Returns None if the task object is not recognized.
  """
  object_dict = {
- "/m/0pg52": TRANSLATIONS["taxis"],
- "/m/01bjv": TRANSLATIONS["bus"],
- "/m/04_sv": TRANSLATIONS["motorcycles"],
- "/m/013xlm": TRANSLATIONS["tractors"],
- "/m/01jk_4": TRANSLATIONS["chimneys"],
- "/m/014xcs": TRANSLATIONS["crosswalks"],
- "/m/015qff": TRANSLATIONS["traffic_lights"],
- "/m/0199g": TRANSLATIONS["bicycles"],
- "/m/015qbp": TRANSLATIONS["parking_meters"],
- "/m/0k4j": TRANSLATIONS["cars"],
- "/m/015kr": TRANSLATIONS["bridges"],
- "/m/019jd": TRANSLATIONS["boats"],
- "/m/0cdl1": TRANSLATIONS["palm_trees"],
- "/m/09d_r": TRANSLATIONS["mountains_or_hills"],
- "/m/01pns0": TRANSLATIONS["fire_hydrant"],
- "/m/01lynh": TRANSLATIONS["stairs"],
+ "/m/0pg52": OBJECT_TRANSLATIONS["taxis"],
+ "/m/01bjv": OBJECT_TRANSLATIONS["bus"],
+ "/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"],
+ "/m/013xlm": OBJECT_TRANSLATIONS["tractors"],
+ "/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"],
+ "/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"],
+ "/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"],
+ "/m/0199g": OBJECT_TRANSLATIONS["bicycles"],
+ "/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"],
+ "/m/0k4j": OBJECT_TRANSLATIONS["cars"],
+ "/m/015kr": OBJECT_TRANSLATIONS["bridges"],
+ "/m/019jd": OBJECT_TRANSLATIONS["boats"],
+ "/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"],
+ "/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"],
+ "/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"],
+ "/m/01lynh": OBJECT_TRANSLATIONS["stairs"],
  }
 
  task = await recaptcha_box.bframe_frame.locator("div").all_inner_texts()
+ object_ = task[0].split("\n")[1]
 
  for object_id, translations in object_dict.items():
- if any(translation in task[0] for translation in translations):
+ if object_ in translations:
  return object_id
 
  return None
@@ -147,18 +150,6 @@ async def _response_callback(self, response: Response) -> None:
  if token_match is not None:
  self._token = token_match.group(1)
 
- async def _random_delay(self, short: bool = True) -> None:
- """
- Delay the browser for a random amount of time.
-
- Parameters
- ----------
- short : bool, optional
- Whether to delay for a short amount of time, by default True.
- """
- delay_time = random.randint(150, 350) if short else random.randint(1250, 1500)
- await self._page.wait_for_timeout(delay_time)
-
  async def _get_capsolver_response(
  self, recaptcha_box: AsyncRecaptchaBox, image_data: bytes
  ) -> Optional[Dict[str, Any]]:
@@ -230,29 +221,38 @@ async def _solve_tiles(
  CapSolverError
  If the CapSolver API returned an error.
  """
- changing_tiles: List[Locator] = []
+ changing_tiles: Dict[Locator, str] = {}
  indexes = indexes.copy()
- random.shuffle(indexes)
+
+ style_script = """
+ (element) => {
+ element.style = "";
+ element.className = "rc-imageselect-tile";
+ }
+ """
 
  for index in indexes:
  tile = recaptcha_box.tile_selector.nth(index)
  await tile.click()
 
- if "rc-imageselect-dynamic-selected" in await tile.get_attribute("class"):
- changing_tiles.append(tile)
+ if "rc-imageselect-dynamic-selected" not in await tile.get_attribute(
+ "class"
+ ):
+ continue
 
- await self._random_delay()
+ changing_tiles[tile] = await tile.locator("img").get_attribute("src")
+ await tile.evaluate(style_script)
 
- while changing_tiles:
- random.shuffle(changing_tiles)
+ start_time = datetime.now()
 
+ while changing_tiles and (datetime.now() - start_time).seconds < 60:
  for tile in changing_tiles.copy():
- if "rc-imageselect-dynamic-selected" in await tile.get_attribute(
- "class"
- ):
+ image_url = await tile.locator("img").get_attribute("src")
+
+ if changing_tiles[tile] == image_url:
  continue
 
- image_url = await tile.locator("img").get_attribute("src")
+ changing_tiles[tile] = image_url
  response = await self._page.request.get(image_url)
 
  capsolver_response = await self._get_capsolver_response(
@@ -263,33 +263,43 @@ async def _solve_tiles(
  capsolver_response is None
  or not capsolver_response["solution"]["hasObject"]
  ):
- changing_tiles.remove(tile)
- else:
- await tile.click()
+ changing_tiles.pop(tile)
+ continue
 
- async def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
+ await tile.click()
+ await tile.evaluate(style_script)
+
+ async def _transcribe_audio(
+ self, audio_url: str, *, language: str = "en-US"
+ ) -> Optional[str]:
  """
- Convert the reCAPTCHA audio to text.
+ Transcribe the reCAPTCHA audio challenge.
 
  Parameters
  ----------
  audio_url : str
  The reCAPTCHA audio URL.
+ language : str, optional
+ The language of the audio, by default en-US.
 
  Returns
  -------
  Optional[str]
- The reCAPTCHA audio text. Returns None if the audio could not be converted.
+ The reCAPTCHA audio text.
+ Returns None if the audio could not be converted.
  """
  loop = asyncio.get_event_loop()
  response = await self._page.request.get(audio_url)
 
  wav_audio = BytesIO()
  mp3_audio = BytesIO(await response.body())
 
- audio: AudioSegment = await loop.run_in_executor(
- None, AudioSegment.from_mp3, mp3_audio
- )
+ try:
+ audio: AudioSegment = await loop.run_in_executor(
+ None, AudioSegment.from_mp3, mp3_audio
+ )
+ except CouldntDecodeError:
+ return None
 
  await loop.run_in_executor(
  None, functools.partial(audio.export, wav_audio, format="wav")
@@ -302,7 +312,10 @@ async def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
 
  try:
  return await loop.run_in_executor(
- None, recognizer.recognize_google, audio_data
+ None,
+ functools.partial(
+ recognizer.recognize_google, audio_data, language=language
+ ),
  )
  except speech_recognition.UnknownValueError:
  return None
@@ -457,8 +470,6 @@ async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
  If the reCAPTCHA rate limit has been exceeded.
  """
  while recaptcha_box.frames_are_attached():
- await self._random_delay()
-
  capsolver_response = await self._get_capsolver_response(
  recaptcha_box, await self._payload_response.body()
  )
@@ -470,33 +481,35 @@ async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
  self._payload_response = None
 
  async with self._page.expect_response(
- re.compile("/recaptcha/(api2|enterprise)/payload")
+ re.compile("/recaptcha/(api2|enterprise)/reload")
  ) as response:
  await recaptcha_box.new_challenge_button.click()
 
  await response.value
+
+ while self._payload_response is None:
+ if await recaptcha_box.rate_limit_is_visible():
+ raise RecaptchaRateLimitError
+
+ await self._page.wait_for_timeout(250)
+
  continue
 
  await self._solve_tiles(
  recaptcha_box, capsolver_response["solution"]["objects"]
  )
 
- await self._random_delay()
-
  self._payload_response = None
  button = recaptcha_box.skip_button.or_(recaptcha_box.next_button)
 
- if await button.is_visible():
- async with self._page.expect_response(
- re.compile("/recaptcha/(api2|enterprise)/payload")
- ) as response:
- await recaptcha_box.new_challenge_button.click()
-
- await response.value
- continue
+ if await button.is_hidden():
+ await self._submit_tile_answers(recaptcha_box)
+ return
 
- await self._submit_tile_answers(recaptcha_box)
- return
+ async with self._page.expect_response(
+ re.compile("/recaptcha/(api2|enterprise)/payload")
+ ):
+ await button.click()
 
  async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None:
  """
@@ -512,11 +525,16 @@ async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None
  RecaptchaRateLimitError
  If the reCAPTCHA rate limit has been exceeded.
  """
- await self._random_delay(short=False)
+ parsed_url = urlparse(recaptcha_box.anchor_frame.url)
+ query_params = parse_qs(parsed_url.query)
+ language = query_params["hl"][0]
+
+ if language not in ORIGINAL_LANGUAGE_AUDIO:
+ language = "en-US"
 
  while True:
  url = await self._get_audio_url(recaptcha_box)
- text = await self._convert_audio_to_text(url)
+ text = await self._transcribe_audio(url, language=language)
 
  if text is not None:
  break
@@ -651,9 +669,6 @@ async def solve_recaptcha(
 
  return self._token
 
- if not image_challenge:
- await recaptcha_box.new_challenge_button.click()
-
  attempts -= 1
 
  raise RecaptchaSolveError
diff --git a/playwright_recaptcha/recaptchav2/base_solver.py b/playwright_recaptcha/recaptchav2/base_solver.py
@@ -84,17 +84,6 @@ def _response_callback(self, response: Response) -> None:
  The response.
  """
 
- @abstractmethod
- def _random_delay(self, short: bool = True) -> None:
- """
- Delay the browser for a random amount of time.
-
- Parameters
- ----------
- short : bool, optional
- Whether to delay for a short amount of time, by default True.
- """
-
  @abstractmethod
  def _get_capsolver_response(
  self, recaptcha_box: RecaptchaBox, image_data: bytes
@@ -140,19 +129,22 @@ def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> N
  """
 
  @abstractmethod
- def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
+ def _transcribe_audio(self, audio_url: str, *, language: str) -> Optional[str]:
  """
- Convert the reCAPTCHA audio to text.
+ Transcribe the reCAPTCHA audio challenge.
 
  Parameters
  ----------
  audio_url : str
  The reCAPTCHA audio URL.
+ language : str
+ The language of the audio.
 
  Returns
  -------
  Optional[str]
- The reCAPTCHA audio text. Returns None if the audio could not be converted.
+ The reCAPTCHA audio text.
+ Returns None if the audio could not be converted.
  """
 
  @abstractmethod