尝试修复验证码问题，尝试修复打包

djkcyl · Jun 14, 2023 · 6232721 · 6232721
1 parent 51a9fef
commit 6232721
Show file tree

Hide file tree

Showing 11 changed files with 439 additions and 405 deletions.
diff --git a/aunly_bbot/cli/api.py b/aunly_bbot/cli/api.py
@@ -39,7 +39,7 @@ async def init_playwright():
             "Mozilla/5.0 (Linux; Android 10; RMX1911) AppleWebKit/537.36 "
             "(KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36"
         ),
-        headless=False,
+        # headless=False,
     )
     PLAYWRIGIT = ff
     logger.info("[Playwright] 正在获取浏览器版本")
@@ -49,7 +49,6 @@ async def init_playwright():
         page = await PLAYWRIGIT.new_page()
     version = await page.evaluate("navigator.appVersion")
     logger.info(f"[BiliBili推送] 浏览器启动完成，当前版本 {version}")
-    await PLAYWRIGIT.clear_cookies()
     logger.debug(await PLAYWRIGIT.cookies())
 
 

diff --git a/aunly_bbot/cli/config.py b/aunly_bbot/cli/config.py
@@ -288,7 +288,7 @@ def openai_api_token(self):
     def openai_model(self):
         openai_model = ListPrompt(
             "请选择 OpenAI 模型",
-            [Choice("gpt-3.5-turbo-0301"), Choice("gpt-4-0314"), Choice("gpt-4-32k-0314")],
+            [Choice("gpt-3.5-turbo-0613"), Choice("gpt-3.5-turbo-16k-0613"), Choice("gpt-4-0613")],
             allow_filter=False,
             annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
         ).prompt()

diff --git a/aunly_bbot/core/log.py b/aunly_bbot/core/log.py
@@ -82,6 +82,7 @@ def in_screen():
 )
 
 logger.success(f"成功重载 logger，当前日志等级为 {log_level}")
+logger.info(f"日志文件将会保存在 {LOGPATH} 中")
 
 # logger.trace("TRACE 等级将会输出至控制台")
 # logger.debug("DEBUG 等级将会输出至控制台")

diff --git a/aunly_bbot/model/config.py b/aunly_bbot/model/config.py
@@ -57,14 +57,15 @@ class _Bilibili(BaseModel, extra=Extra.ignore):
     dynamic_font_source: Literal["local", "remote"] = "local"
     openai_summarization: bool = False
     openai_api_token: Optional[str] = None
-    openai_model: str = "gpt-3.5-turbo"
+    openai_model: str = "gpt-3.5-turbo-0301"
     openai_proxy: Optional[AnyHttpUrl] = None
     openai_cooldown: int = 60
     openai_whitelist_users: Optional[list[int]] = None
     openai_promot_version: int = 2
     use_wordcloud: bool = False
     use_bcut_asr: bool = False
     asr_length_threshold: int = 60
+    captcha_address: Optional[AnyHttpUrl] = None
 
     # 验证是否可以登录
     @validator("use_login", always=True)

diff --git a/aunly_bbot/static/bot_config.exp.yaml b/aunly_bbot/static/bot_config.exp.yaml
@@ -30,6 +30,7 @@ Bilibili:
   use_wordcloud: true                     # 是否使用词云
   use_bcut_asr: true                      # 是否使用 BCut 接口进行 AI 语音识别
   asr_length_threshold: 60                # 调用语音识别的最小长度阈值（秒）
+  captcha_address: null                   # 验证码识别服务地址
 Event:
   mute: true                              # 是否向管理员发送被禁言的事件提醒。
   permchange: true                        # 是否向管理员发送权限变更的事件提醒。

diff --git a/aunly_bbot/utils/browser_shot.py b/aunly_bbot/utils/browser_shot.py
@@ -11,12 +11,12 @@
 from sentry_sdk import capture_exception
 from playwright._impl._api_types import TimeoutError
 from graiax.playwright.interface import PlaywrightContext
+from playwright._impl._api_structures import Position
 from playwright.async_api._generated import (
     Page,
     Route,
     Request,
     Response,
-    Position,
     BrowserContext,
 )
 
@@ -153,89 +153,96 @@ async def captcha_result_callback(response: Response):
         captcha_resp = await response.text()
         logger.debug(f"[Captcha] Result: {captcha_resp}")
         if '"result": "success"' in captcha_resp:
-            logger.success("验证码验证成功")
+            logger.success("[Captcha] 验证码 Callback 验证成功")
             captcha_result = True
         elif '"result": "click"' in captcha_resp:
             pass
         else:
             if last_captcha_id:
-                logger.warning(f"验证码验证失败，正在上报：{last_captcha_id}")
+                logger.warning(f"[Captcha] 验证码 Callback 验证失败，正在上报：{last_captcha_id}")
                 async with httpx.AsyncClient() as client:
                     await client.post(
-                        "http://10.0.0.106:8660/captcha/select/report",
-                        json={"captcha_id": last_captcha_id},
+                        f"{captcha_baseurl}/report", json={"captcha_id": last_captcha_id}
                     )
                 last_captcha_id = ""
             captcha_result = False
 
     await page.set_viewport_size({"width": 460, "height": 720})
 
-    page.on(
-        "response",
-        lambda response: captcha_image_url_callback(response)
-        if response.url.startswith("https://static.geetest.com/captcha_v3/")
-        else None,
-    )
-    page.on(
-        "response",
-        lambda response: captcha_result_callback(response)
-        if response.url.startswith("https://api.geetest.com/ajax.php")
-        else None,
-    )
+    captcha_address = BotConfig.Bilibili.captcha_address
+    if captcha_address:
+        page.on(
+            "response",
+            lambda response: captcha_image_url_callback(response)
+            if response.url.startswith("https://static.geetest.com/captcha_v3/")
+            else None,
+        )
+        page.on(
+            "response",
+            lambda response: captcha_result_callback(response)
+            if response.url.startswith("https://api.geetest.com/ajax.php")
+            else None,
+        )
 
     with contextlib.suppress(TimeoutError):
         await page.goto(url, wait_until="networkidle", timeout=20000)
 
-    while captcha_image_body or captcha_result is False:
-        captcha_image = await page.query_selector(".geetest_item_img")
-        assert captcha_image
-        captcha_size = await captcha_image.bounding_box()
-        assert captcha_size
-        print(captcha_size)
-        origin_image_size = 344, 384
-
-        async with httpx.AsyncClient() as client:
-            captcha_req = await client.post(
-                "http://10.0.0.106:8660/captcha/select/bytes",
-                timeout=10,
-                files={"img_file": captcha_image_body},
-            )
-            captcha_req = CaptchaResponse(**captcha_req.json())
-            logger.debug(f"[Captcha] Get Resolve Result: {captcha_req}")
-            assert captcha_req.data
-            last_captcha_id = captcha_req.data.captcha_id
-        if captcha_req.data:
-            click_points: list[list[int]] = captcha_req.data.points
-            # 根据原图大小和截图大小计算缩放比例，然后计算出正确的需要点击的位置
-            for point in click_points:
-                real_click_points = {
-                    "x": point[0] * captcha_size["width"] / origin_image_size[0],
-                    "y": point[1] * captcha_size["height"] / origin_image_size[1],
-                }
-                await captcha_image.click(position=Position(**real_click_points))
-                await page.wait_for_timeout(1200)
-            captcha_image_body = ""
-            await page.click("text=确认")
-            geetest_up = await page.wait_for_selector(".geetest_up", state="visible")
-            Path("captcha.jpg").write_bytes(await page.screenshot())
-            if not geetest_up:
-                logger.warning("未检测到验证码验证结果，正在重试")
-                continue
-            geetest_result = await geetest_up.text_content()
-            assert geetest_result
-            logger.debug(f"[Captcha] Geetest result: {geetest_result}")
-            if "验证成功" in geetest_result:
-                logger.success("极验验证成功")
-            else:
-                logger.warning("极验验证失败，正在重试")
-
-    with contextlib.suppress(TimeoutError):
-        await page.goto(url, wait_until="networkidle", timeout=20000)
+    if captcha_address:
+        captcha_baseurl = f"{captcha_address.scheme}://{captcha_address.host}:{captcha_address.port}/captcha/select"
+        while captcha_image_body or captcha_result is False:
+            logger.warning("[Captcha] 需要人机验证，正在尝试自动解决验证码")
+            captcha_image = await page.query_selector(".geetest_item_img")
+            assert captcha_image
+            captcha_size = await captcha_image.bounding_box()
+            assert captcha_size
+            origin_image_size = 344, 384
+
+            async with httpx.AsyncClient() as client:
+                captcha_req = await client.post(
+                    f"{captcha_baseurl}/bytes",
+                    timeout=10,
+                    files={"img_file": captcha_image_body},
+                )
+                captcha_req = CaptchaResponse(**captcha_req.json())
+                logger.debug(f"[Captcha] Get Resolve Result: {captcha_req}")
+                assert captcha_req.data
+                last_captcha_id = captcha_req.data.captcha_id
+            if captcha_req.data:
+                click_points: list[list[int]] = captcha_req.data.points
+                logger.warning(f"[Captcha] 识别到 {len(click_points)} 个坐标，正在点击")
+                # 根据原图大小和截图大小计算缩放比例，然后计算出正确的需要点击的位置
+                for point in click_points:
+                    real_click_points = {
+                        "x": point[0] * captcha_size["width"] / origin_image_size[0],
+                        "y": point[1] * captcha_size["height"] / origin_image_size[1],
+                    }
+                    await captcha_image.click(position=Position(**real_click_points))
+                    await page.wait_for_timeout(800)
+                captcha_image_body = ""
+                await page.click("text=确认")
+                geetest_up = await page.wait_for_selector(".geetest_up", state="visible")
+                Path("captcha.jpg").write_bytes(await page.screenshot())
+                if not geetest_up:
+                    logger.warning("[Captcha] 未检测到验证码验证结果，正在重试")
+                    continue
+                geetest_result = await geetest_up.text_content()
+                assert geetest_result
+                logger.debug(f"[Captcha] Geetest result: {geetest_result}")
+                if "验证成功" in geetest_result:
+                    logger.success("[Captcha] 极验网页 Tip 验证成功")
+                else:
+                    logger.warning("[Captcha] 极验验证失败，正在重试")
+
+                with contextlib.suppress(TimeoutError):
+                    await page.wait_for_load_state(state="domcontentloaded", timeout=20000)
 
     if "bilibili.com/404" in page.url:
         logger.warning(f"[Bilibili推送] {dynid} 动态不存在")
         raise Notfound
 
+    await page.wait_for_load_state(state="domcontentloaded", timeout=20000)
+    await page.wait_for_selector(".opus-module-author", state="visible")
+
     await page.add_script_tag(path=mobile_style_js)
     await page.wait_for_function("getMobileStyle()")
 

diff --git a/aunly_bbot/utils/openai.py b/aunly_bbot/utils/openai.py
@@ -4,16 +4,18 @@
 import tiktoken_async
 
 from loguru import logger
+from httpx import Response
 from typing import Optional
 from collections import OrderedDict
-from httpx import HTTPError, Response
 
 from ..core.bot_config import BotConfig
 from ..model.openai import OpenAI, TokenUsage
 
-LIMIT_COUNT = {"gpt-3.5-turbo-0301": 3500, "gpt-4-0314": 7600, "gpt-4-32k-0314": 32200}.get(
-    BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0301", 3500
-)
+LIMIT_COUNT = {
+    "gpt-3.5-turbo-0613": 3500,
+    "gpt-3.5-turbo-16k-0613": 15000,
+    "gpt-4-0613": 7600,
+}.get(BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0613", 3500)
 
 if BotConfig.Bilibili.openai_summarization:
     logger.info("正在加载 OpenAI Token 计算模型")
@@ -59,10 +61,10 @@ def get_summarise_prompt(title: str, transcript: str) -> list[dict[str, str]]:
 def count_tokens(prompts: list[dict[str, str]]):
     """根据内容计算 token 数"""
 
-    if BotConfig.Bilibili.openai_model == "gpt-3.5-turbo-0301":
+    if BotConfig.Bilibili.openai_model.startswith("gpt-3.5-turbo"):
         tokens_per_message = 4
         tokens_per_name = -1
-    elif BotConfig.Bilibili.openai_model == "gpt-4":
+    elif BotConfig.Bilibili.openai_model.startswith("gpt-4"):
         tokens_per_message = 3
         tokens_per_name = 1
     else: