Merge pull request #62 from djkcyl/dev

v1.5.0-beta1
djkcyl · Jun 14, 2023 · 2c8ef4f · 2c8ef4f
2 parents e9b7265 + 6232721
commit 2c8ef4f
Show file tree

Hide file tree

Showing 13 changed files with 508 additions and 352 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -114,7 +114,7 @@ jobs:
 
       - name: Build Nuitka Binary
         run: |
-          pdm run pip install -U --force-reinstall "https://github.com/Nuitka/Nuitka/archive/factory.zip"
+          pdm add "https://github.com/Nuitka/Nuitka/archive/factory.zip"
           pdm run python -m nuitka --onefile --standalone --show-progress --assume-yes-for-downloads --output-dir=nuitka --windows-icon-from-ico=tv.ico --user-package-configuration-file=nuitka-${{ matrix.group }}.yml main.py
 
       - name: Get commit hash

diff --git a/aunly_bbot/cli/api.py b/aunly_bbot/cli/api.py
@@ -10,7 +10,7 @@
 from graiax.playwright.installer import install_playwright
 
 from ..utils.browser_shot import screenshot
-from ..utils.fonts_provider import get_font
+from ..utils.fonts_provider import font_init
 from ..utils.detect_package import is_package
 
 
@@ -27,7 +27,7 @@
 async def init_playwright():
     global PLAYWRIGIT
     logger.info("正在下载字体...")
-    await get_font()
+    font_init()
     logger.success("字体下载完成！")
 
     await install_playwright(browser_type="firefox")
@@ -39,10 +39,14 @@ async def init_playwright():
             "Mozilla/5.0 (Linux; Android 10; RMX1911) AppleWebKit/537.36 "
             "(KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36"
         ),
+        # headless=False,
     )
     PLAYWRIGIT = ff
     logger.info("[Playwright] 正在获取浏览器版本")
-    page = await PLAYWRIGIT.new_page()
+    if len(PLAYWRIGIT.pages) > 0:
+        page = PLAYWRIGIT.pages[0]
+    else:
+        page = await PLAYWRIGIT.new_page()
     version = await page.evaluate("navigator.appVersion")
     logger.info(f"[BiliBili推送] 浏览器启动完成，当前版本 {version}")
     logger.debug(await PLAYWRIGIT.cookies())

diff --git a/aunly_bbot/cli/config.py b/aunly_bbot/cli/config.py
@@ -288,7 +288,7 @@ def openai_api_token(self):
     def openai_model(self):
         openai_model = ListPrompt(
             "请选择 OpenAI 模型",
-            [Choice("gpt-3.5-turbo-0301"), Choice("gpt-4-0314"), Choice("gpt-4-32k-0314")],
+            [Choice("gpt-3.5-turbo-0613"), Choice("gpt-3.5-turbo-16k-0613"), Choice("gpt-4-0613")],
             allow_filter=False,
             annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
         ).prompt()
@@ -306,13 +306,14 @@ def openai_proxy(self):
 
     def bilibili_username(self):
         username = InputPrompt("请输入 Bilibili 用户名: （可用于 AI 总结时获取 Bilibili 的 AI 字幕）").prompt()
-        if not username:
-            click.secho("用户名不能为空！", fg="bright_red", bold=True)
-            self.bilibili_username()
+        if not username or username == "":
+            self.config["Bilibili"]["username"] = username
+            return click.secho("用户名为空，已关闭对应功能！", fg="bright_red", bold=True)
         elif not username.isdigit():
             click.secho("用户名不合法！", fg="bright_red", bold=True)
             self.bilibili_username()
         self.config["Bilibili"]["username"] = username
+        self.bilibili_password()
 
     def bilibili_password(self):
         password = InputPrompt("请输入 Bilibili 密码: ", is_password=True).prompt()
@@ -450,12 +451,11 @@ def log_level(self):
             ListPrompt(
                 "请选择日志等级",
                 [
-                    Choice("DEBUG"),
                     Choice("INFO"),
+                    Choice("DEBUG"),
                     Choice("WARNING"),
                 ],
                 allow_filter=False,
-                default_select=1,
                 annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
             )
             .prompt()

diff --git a/aunly_bbot/core/log.py b/aunly_bbot/core/log.py
@@ -82,6 +82,7 @@ def in_screen():
 )
 
 logger.success(f"成功重载 logger，当前日志等级为 {log_level}")
+logger.info(f"日志文件将会保存在 {LOGPATH} 中")
 
 # logger.trace("TRACE 等级将会输出至控制台")
 # logger.debug("DEBUG 等级将会输出至控制台")

diff --git a/aunly_bbot/model/captcha.py b/aunly_bbot/model/captcha.py
@@ -0,0 +1,16 @@
+from typing import Optional
+from pydantic import BaseModel
+
+
+class CaptchaData(BaseModel):
+    captcha_id: str
+    points: list[list[int]]
+    rectangles: list[list[int]]
+    yolo_data: list[list[int]]
+    time: int
+
+
+class CaptchaResponse(BaseModel):
+    code: int
+    message: str
+    data: Optional[CaptchaData]
diff --git a/aunly_bbot/model/config.py b/aunly_bbot/model/config.py
@@ -46,7 +46,7 @@ def can_use_login(cls, enable, values):
 
 
 class _Bilibili(BaseModel, extra=Extra.ignore):
-    username: Optional[int]
+    username: Optional[str]
     password: Optional[str]
     use_login: bool = False
     use_browser: bool = True
@@ -57,14 +57,15 @@ class _Bilibili(BaseModel, extra=Extra.ignore):
     dynamic_font_source: Literal["local", "remote"] = "local"
     openai_summarization: bool = False
     openai_api_token: Optional[str] = None
-    openai_model: str = "gpt-3.5-turbo"
+    openai_model: str = "gpt-3.5-turbo-0301"
     openai_proxy: Optional[AnyHttpUrl] = None
     openai_cooldown: int = 60
     openai_whitelist_users: Optional[list[int]] = None
     openai_promot_version: int = 2
     use_wordcloud: bool = False
     use_bcut_asr: bool = False
     asr_length_threshold: int = 60
+    captcha_address: Optional[AnyHttpUrl] = None
 
     # 验证是否可以登录
     @validator("use_login", always=True)

diff --git a/aunly_bbot/static/bot_config.exp.yaml b/aunly_bbot/static/bot_config.exp.yaml
@@ -30,6 +30,7 @@ Bilibili:
   use_wordcloud: true                     # 是否使用词云
   use_bcut_asr: true                      # 是否使用 BCut 接口进行 AI 语音识别
   asr_length_threshold: 60                # 调用语音识别的最小长度阈值（秒）
+  captcha_address: null                   # 验证码识别服务地址
 Event:
   mute: true                              # 是否向管理员发送被禁言的事件提醒。
   permchange: true                        # 是否向管理员发送权限变更的事件提醒。

diff --git a/aunly_bbot/utils/browser_shot.py b/aunly_bbot/utils/browser_shot.py
@@ -1,5 +1,6 @@
 import re
 import time
+import httpx
 import asyncio
 import contextlib
 
@@ -10,9 +11,17 @@
 from sentry_sdk import capture_exception
 from playwright._impl._api_types import TimeoutError
 from graiax.playwright.interface import PlaywrightContext
-from playwright.async_api._generated import Request, Page, BrowserContext, Route
+from playwright._impl._api_structures import Position
+from playwright.async_api._generated import (
+    Page,
+    Route,
+    Request,
+    Response,
+    BrowserContext,
+)
 
 from ..core.bot_config import BotConfig
+from ..model.captcha import CaptchaResponse
 
 from .fonts_provider import get_font
 
@@ -46,6 +55,10 @@ async def fill_font(route: Route, request: Request):
         await route.fallback()
 
 
+async def resolve_select_captcha(page: Page):
+    pass
+
+
 async def browser_dynamic(dynid: str):
     app = Ariadne.current()
     browser_context = app.launch_manager.get_interface(PlaywrightContext).context
@@ -125,16 +138,111 @@ def network_requestfailed(request: Request):
 
 async def get_mobile_screenshot(page: Page, dynid: str):
     url = f"https://m.bilibili.com/dynamic/{dynid}"
+    captcha_image_body = ""
+    last_captcha_id = ""
+    captcha_result = None
+
+    async def captcha_image_url_callback(response: Response):
+        nonlocal captcha_image_body
+        logger.debug(f"[Captcha] Get captcha image url: {response.url}")
+        captcha_image_body = await response.body()
+
+    async def captcha_result_callback(response: Response):
+        nonlocal captcha_result, last_captcha_id
+        logger.debug(f"[Captcha] Get captcha result: {response.url}")
+        captcha_resp = await response.text()
+        logger.debug(f"[Captcha] Result: {captcha_resp}")
+        if '"result": "success"' in captcha_resp:
+            logger.success("[Captcha] 验证码 Callback 验证成功")
+            captcha_result = True
+        elif '"result": "click"' in captcha_resp:
+            pass
+        else:
+            if last_captcha_id:
+                logger.warning(f"[Captcha] 验证码 Callback 验证失败，正在上报：{last_captcha_id}")
+                async with httpx.AsyncClient() as client:
+                    await client.post(
+                        f"{captcha_baseurl}/report", json={"captcha_id": last_captcha_id}
+                    )
+                last_captcha_id = ""
+            captcha_result = False
 
     await page.set_viewport_size({"width": 460, "height": 720})
 
+    captcha_address = BotConfig.Bilibili.captcha_address
+    if captcha_address:
+        page.on(
+            "response",
+            lambda response: captcha_image_url_callback(response)
+            if response.url.startswith("https://static.geetest.com/captcha_v3/")
+            else None,
+        )
+        page.on(
+            "response",
+            lambda response: captcha_result_callback(response)
+            if response.url.startswith("https://api.geetest.com/ajax.php")
+            else None,
+        )
+
     with contextlib.suppress(TimeoutError):
         await page.goto(url, wait_until="networkidle", timeout=20000)
 
+    if captcha_address:
+        captcha_baseurl = f"{captcha_address.scheme}://{captcha_address.host}:{captcha_address.port}/captcha/select"
+        while captcha_image_body or captcha_result is False:
+            logger.warning("[Captcha] 需要人机验证，正在尝试自动解决验证码")
+            captcha_image = await page.query_selector(".geetest_item_img")
+            assert captcha_image
+            captcha_size = await captcha_image.bounding_box()
+            assert captcha_size
+            origin_image_size = 344, 384
+
+            async with httpx.AsyncClient() as client:
+                captcha_req = await client.post(
+                    f"{captcha_baseurl}/bytes",
+                    timeout=10,
+                    files={"img_file": captcha_image_body},
+                )
+                captcha_req = CaptchaResponse(**captcha_req.json())
+                logger.debug(f"[Captcha] Get Resolve Result: {captcha_req}")
+                assert captcha_req.data
+                last_captcha_id = captcha_req.data.captcha_id
+            if captcha_req.data:
+                click_points: list[list[int]] = captcha_req.data.points
+                logger.warning(f"[Captcha] 识别到 {len(click_points)} 个坐标，正在点击")
+                # 根据原图大小和截图大小计算缩放比例，然后计算出正确的需要点击的位置
+                for point in click_points:
+                    real_click_points = {
+                        "x": point[0] * captcha_size["width"] / origin_image_size[0],
+                        "y": point[1] * captcha_size["height"] / origin_image_size[1],
+                    }
+                    await captcha_image.click(position=Position(**real_click_points))
+                    await page.wait_for_timeout(800)
+                captcha_image_body = ""
+                await page.click("text=确认")
+                geetest_up = await page.wait_for_selector(".geetest_up", state="visible")
+                Path("captcha.jpg").write_bytes(await page.screenshot())
+                if not geetest_up:
+                    logger.warning("[Captcha] 未检测到验证码验证结果，正在重试")
+                    continue
+                geetest_result = await geetest_up.text_content()
+                assert geetest_result
+                logger.debug(f"[Captcha] Geetest result: {geetest_result}")
+                if "验证成功" in geetest_result:
+                    logger.success("[Captcha] 极验网页 Tip 验证成功")
+                else:
+                    logger.warning("[Captcha] 极验验证失败，正在重试")
+
+                with contextlib.suppress(TimeoutError):
+                    await page.wait_for_load_state(state="domcontentloaded", timeout=20000)
+
     if "bilibili.com/404" in page.url:
         logger.warning(f"[Bilibili推送] {dynid} 动态不存在")
         raise Notfound
 
+    await page.wait_for_load_state(state="domcontentloaded", timeout=20000)
+    await page.wait_for_selector(".opus-module-author", state="visible")
+
     await page.add_script_tag(path=mobile_style_js)
     await page.wait_for_function("getMobileStyle()")
 

diff --git a/aunly_bbot/utils/openai.py b/aunly_bbot/utils/openai.py
@@ -4,16 +4,18 @@
 import tiktoken_async
 
 from loguru import logger
+from httpx import Response
 from typing import Optional
 from collections import OrderedDict
-from httpx import HTTPError, Response
 
 from ..core.bot_config import BotConfig
 from ..model.openai import OpenAI, TokenUsage
 
-LIMIT_COUNT = {"gpt-3.5-turbo-0301": 3500, "gpt-4-0314": 7600, "gpt-4-32k-0314": 32200}.get(
-    BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0301", 3500
-)
+LIMIT_COUNT = {
+    "gpt-3.5-turbo-0613": 3500,
+    "gpt-3.5-turbo-16k-0613": 15000,
+    "gpt-4-0613": 7600,
+}.get(BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0613", 3500)
 
 if BotConfig.Bilibili.openai_summarization:
     logger.info("正在加载 OpenAI Token 计算模型")
@@ -59,10 +61,10 @@ def get_summarise_prompt(title: str, transcript: str) -> list[dict[str, str]]:
 def count_tokens(prompts: list[dict[str, str]]):
     """根据内容计算 token 数"""
 
-    if BotConfig.Bilibili.openai_model == "gpt-3.5-turbo-0301":
+    if BotConfig.Bilibili.openai_model.startswith("gpt-3.5-turbo"):
         tokens_per_message = 4
         tokens_per_name = -1
-    elif BotConfig.Bilibili.openai_model == "gpt-4":
+    elif BotConfig.Bilibili.openai_model.startswith("gpt-4"):
         tokens_per_message = 3
         tokens_per_name = 1
     else: