feat: add /v1/completions parser && add /v1/chat/completions function…

… call record (#80) - add /v1/completions parser - add /v1/chat/completions function call record - log directory has changed - add logo
KenyonY · Oct 15, 2023 · e7da8de · e7da8de
1 parent 3bbf3fa
commit e7da8de
Show file tree

Hide file tree

Showing 24 changed files with 402 additions and 173 deletions.
diff --git a/.env b/.env
@@ -1,12 +1,16 @@
 # 示例与解释见 .env.example
 
 # `LOG_CHAT`: 是否记录日志
-LOG_CHAT=false
+#LOG_CHAT=false
+
+#BENCHMARK_MODE=true
+
+#PRINT_CHAT=true
 
-PRINT_CHAT=false
 # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址，允许指定多个, 以逗号隔开。
 # 如果指定超过一个，则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/
-OPENAI_BASE_URL=https://api.openai.com
+OPENAI_BASE_URL=https://api.openai-forward.com
+#OPENAI_BASE_URL=https://api.openai.com
 
 # `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀
 OPENAI_ROUTE_PREFIX=
@@ -15,6 +19,7 @@ OPENAI_API_KEY=
 FORWARD_KEY=
 
 CHAT_COMPLETION_ROUTE=/v1/chat/completions
+COMPLETION_ROUTE=/v1/completions
 
 # `EXTRA_BASE_URL`: 可指定任意服务转发
 EXTRA_BASE_URL=
@@ -24,10 +29,14 @@ EXTRA_ROUTE_PREFIX=
 # `REQ_RATE_LIMIT`: i.e. 对指定路由的请求速率限制, 区分用户
 # format: {route: ratelimit-string}
 # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
-REQ_RATE_LIMIT={"/v1/chat/completions":"60/minute;600/hour", "/v1/completions":"60/minute;600/hour"}
+#REQ_RATE_LIMIT={"/v1/chat/completions":"60/minute;600/hour", "/v1/completions":"60/minute;600/hour"}
+REQ_RATE_LIMIT={"/benchmark/v1/chat/completions":"10/10second;100/2minutes"}
+
+# rate limit后端: [memory, redis, memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
+#REQ_RATE_LIMIT_BACKEND=redis://localhost:6379
 
 # `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
-GLOBAL_RATE_LIMIT=
+GLOBAL_RATE_LIMIT=100/minute
 
 #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
 # `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost.
@@ -38,7 +47,10 @@ TOKEN_RATE_LIMIT={"/v1/chat/completions":"50/second","/v1/completions":"60/secon
 
 
 # TCP连接的超时时间（秒）
-TIMEOUT=10
+TIMEOUT=6
+
+ITER_CHUNK_TYPE=one-by-one
+#ITER_CHUNK_TYPE=efficiency
 
 IP_BLACKLIST=
 

diff --git a/.env.example b/.env.example
@@ -13,6 +13,7 @@ OPENAI_BASE_URL='https://api.openai.com, http://localhost:8080'
 OPENAI_ROUTE_PREFIX='/openai, /localai'
 
 CHAT_COMPLETION_ROUTE=/openai/v1/chat/completions
+COMPLETION_ROUTE=/v1/completions
 
 # OPENAI_API_KEY：允许输入多个api key, 以逗号隔开, 形成轮询池
 OPENAI_API_KEY='sk-xxx1, sk-xxx2, sk-xxx3'
@@ -34,6 +35,9 @@ REQ_RATE_LIMIT='{
 "/localai/v1/chat/completions": "2/second"
 }'
 
+# rate limit后端: [memory, redis, Memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
+REQ_RATE_LIMIT_BACKEND="redis://localhost:6379"
+
 # `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
 GLOBAL_RATE_LIMIT=inf
 
@@ -48,7 +52,9 @@ PROXY=http://localhost:7890
 TOKEN_RATE_LIMIT={"/v1/chat/completions":"20/second", "/benchmark/v1/chat/completions":"500/second"}
 
 
-TIMEOUT=100
+TIMEOUT=10
+
+ITER_CHUNK_TYPE=efficiency
 
 # 设定时区
 TZ=Asia/Shanghai
diff --git a/.github/data/logo.png b/.github/data/logo.png
diff --git a/Examples/chat_completion.py b/Examples/chat_completion.py
@@ -1,6 +1,6 @@
 import openai
 from rich import print
-from sparrow import yaml_load  # pip install sparrow-python
+from sparrow import MeasureTime, yaml_load  # pip install sparrow-python
 
 config = yaml_load("config.yaml", rel_path=True)
 print(f"{config=}")
@@ -9,35 +9,83 @@
 
 stream = True
 # stream = False
+
 # debug = True
 debug = False
 
+# is_function_call = True
+is_function_call = False
+
 user_content = """
 用c实现目前已知最快平方根算法
 """
-from sparrow import MeasureTime
 
 mt = MeasureTime().start()
-resp = openai.ChatCompletion.create(
-    model="gpt-3.5-turbo",
-    # model="gpt-4",
-    messages=[
-        {"role": "user", "content": user_content},
-    ],
-    stream=stream,
-    request_timeout=30,
-)
+
+
+# function_call
+if is_function_call:
+    functions = [
+        {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        }
+    ]
+    resp = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": "What's the weather like in Boston today?"}
+        ],
+        functions=functions,
+        function_call="auto",  # auto is default, but we'll be explicit
+        stream=stream,
+        request_timeout=30,
+    )
+
+else:
+    resp = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        # model="gpt-4",
+        messages=[
+            {"role": "user", "content": user_content},
+        ],
+        stream=stream,
+        request_timeout=30,
+    )
 
 if stream:
     if debug:
         for chunk in resp:
             print(chunk)
     else:
         chunk_message = next(resp)['choices'][0]['delta']
-        print(f"{chunk_message['role']}: ")
+        if is_function_call:
+            function_call = chunk_message.get("function_call", "")
+            name = function_call["name"]
+            print(f"{chunk_message['role']}: \n{name}: ")
+        else:
+            print(f"{chunk_message['role']}: ")
         for chunk in resp:
             chunk_message = chunk['choices'][0]['delta']
-            content = chunk_message.get("content", "")
+            content = ""
+            if is_function_call:
+                function_call = chunk_message.get("function_call", "")
+                if function_call:
+                    content = function_call.get("arguments", "")
+
+            else:
+                content = chunk_message.get("content", "")
             print(content, end="")
         print()
 else:

diff --git a/Examples/completion.py b/Examples/completion.py
@@ -11,6 +11,8 @@
 
 
 stream = True
+# debug=True
+debug = False
 
 
 user_content = "现在让我们使用泰勒展开推导出牛顿法迭代公式:  \n"
@@ -27,11 +29,21 @@
 console = Console()
 sentences = ""
 if stream:
-    for chunk in resp:
-        text = chunk['choices'][0]['text']
-        console.print(text, end="")
-        sentences += text
+    if debug:
+        for chunk in resp:
+            print(chunk)
+    else:
+        for chunk in resp:
+            text = chunk['choices'][0]['text']
+            console.print(text, end="")
+            sentences += text
     print()
-
-# print(70*"-")
+else:
+    if debug:
+        print(resp)
+    else:
+        sentences = resp['choices'][0]['text']
+        print(sentences)
+
+print(70 * "-")
 # console.print(Markdown(sentences))
diff --git a/README.md b/README.md
@@ -5,9 +5,11 @@
     OpenAI Forward
     <br>
 </h1>
-<p align="center">
-        一个支持多目标路由、流量控制、自动重试以及一键云端部署的高效代理工具 
-</p>
+
+
+<div align=center><img src=.github/data/logo.png width="300px"></div>
+
+
 
 <p align="center">
     <a href="https://pypi.org/project/openai-forward/">
@@ -44,10 +46,9 @@
 </div>
 
 openai-forward
-是一个专为大型语言模型设计的高级转发服务，提供包括用户请求速率控制、Token速率限制和自定义API密钥等增强功能。
+是一个专为大型语言模型设计的高级转发服务，提供包括用户请求速率控制、Token速率限制、日志记录和自定义API密钥等功能。
 该服务可用于代理本地模型（如 [LocalAI](https://github.com/go-skynet/LocalAI)）或云端模型（如 [OpenAI](https://api.openai.com)）。
-服务由 `fastapi`,`aiohttp`,`asyncio`全异步实现，保证了其高效性。
-
+服务由 `fastapi`,`aiohttp`,`asyncio`完全异步实现。
 
 
 <a>
@@ -60,8 +61,8 @@ OpenAI-Forward 提供如下功能：
 
 - **全能代理**: 具备转发几乎所有类型请求的能力
 - **用户流量控制**: 实现用户请求速率限制（RPM）和流式Token速率限制（TPM）
+- **实时响应日志**: 支持流式响应的会话日志记录，用于调试自己的prompt合理性
 - **自定义秘钥**: 允许用户用自定义生成的密钥替代原始API密钥
-- **实时响应日志**: 支持流式响应的会话日志记录
 - **多目标路由**: 能够同时转发多个服务到不同的路由地址
 - **自动重试机制**：在请求失败时自动重试
 - **快速部署**: `pip`/`docker` 快速本地安装和部署，支持一键云端部署

diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.0"
+__version__ = "0.6.1"
 
 from dotenv import load_dotenv
 

diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py
@@ -46,17 +46,17 @@ def convert(log_folder: str = None, target_path: str = None):
         Returns:
             None
         """
-        from openai_forward.helper import convert_folder_to_jsonl
+        from openai_forward.helper import convert_folder_to_jsonl, route_prefix_to_str
         from openai_forward.settings import OPENAI_ROUTE_PREFIX
 
         print(60 * '-')
         if log_folder is None:
             if target_path is not None:
                 raise ValueError("target_path must be None when log_folder is None")
-            _prefix_list = [i.replace("/", "_") for i in OPENAI_ROUTE_PREFIX]
-            for _prefix in _prefix_list:
-                log_folder = f"./Log/chat/{_prefix}"
-                target_path = f"./Log/chat{_prefix}.json"
+            _prefix_list = [route_prefix_to_str(i) for i in OPENAI_ROUTE_PREFIX]
+            for prefix in _prefix_list:
+                log_folder = f"./Log/{prefix}/chat"
+                target_path = f"./Log/chat_{prefix}.json"
                 print(f"Convert {log_folder}/*.log to {target_path}")
                 convert_folder_to_jsonl(log_folder, target_path)
                 print(60 * '-')

diff --git a/openai_forward/app.py b/openai_forward/app.py
@@ -3,20 +3,25 @@
 from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.errors import RateLimitExceeded
 
-from . import custom_slowapi
-from .forward import create_generic_proxies, create_openai_proxies
+from . import __version__, custom_slowapi
+from .forward.extra import generic_objs
+from .forward.openai import openai_objs
 from .helper import normalize_route as normalize_route_path
 from .settings import (
     BENCHMARK_MODE,
+    RATE_LIMIT_BACKEND,
     RATE_LIMIT_STRATEGY,
     dynamic_request_rate_limit,
     get_limiter_key,
     show_startup,
 )
 
-limiter = Limiter(key_func=get_limiter_key, strategy=RATE_LIMIT_STRATEGY)
-
-app = FastAPI(title="openai_forward", version="0.5")
+limiter = Limiter(
+    key_func=get_limiter_key,
+    strategy=RATE_LIMIT_STRATEGY,
+    storage_uri=RATE_LIMIT_BACKEND,
+)
+app = FastAPI(title="openai-forward", version=__version__)
 
 app.state.limiter = limiter
 app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
@@ -57,9 +62,6 @@ def healthz(request: Request):
         methods=["POST"],
     )
 
-openai_objs = create_openai_proxies()
-generic_objs = create_generic_proxies()
-
 
 @app.on_event("shutdown")
 async def shutdown():