Skip to content

Commit

Permalink
feat: add /v1/completions parser && add /v1/chat/completions function…
Browse files Browse the repository at this point in the history
… call record (#80)

- add /v1/completions parser
- add /v1/chat/completions function call record
- log directory has changed
- add logo
  • Loading branch information
KenyonY authored Oct 15, 2023
1 parent 3bbf3fa commit e7da8de
Show file tree
Hide file tree
Showing 24 changed files with 402 additions and 173 deletions.
24 changes: 18 additions & 6 deletions .env
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# 示例与解释见 .env.example

# `LOG_CHAT`: 是否记录日志
LOG_CHAT=false
#LOG_CHAT=false

#BENCHMARK_MODE=true

#PRINT_CHAT=true

PRINT_CHAT=false
# `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。
# 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/
OPENAI_BASE_URL=https://api.openai.com
OPENAI_BASE_URL=https://api.openai-forward.com
#OPENAI_BASE_URL=https://api.openai.com

# `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀
OPENAI_ROUTE_PREFIX=
Expand All @@ -15,6 +19,7 @@ OPENAI_API_KEY=
FORWARD_KEY=

CHAT_COMPLETION_ROUTE=/v1/chat/completions
COMPLETION_ROUTE=/v1/completions

# `EXTRA_BASE_URL`: 可指定任意服务转发
EXTRA_BASE_URL=
Expand All @@ -24,10 +29,14 @@ EXTRA_ROUTE_PREFIX=
# `REQ_RATE_LIMIT`: i.e. 对指定路由的请求速率限制, 区分用户
# format: {route: ratelimit-string}
# ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
REQ_RATE_LIMIT={"/v1/chat/completions":"60/minute;600/hour", "/v1/completions":"60/minute;600/hour"}
#REQ_RATE_LIMIT={"/v1/chat/completions":"60/minute;600/hour", "/v1/completions":"60/minute;600/hour"}
REQ_RATE_LIMIT={"/benchmark/v1/chat/completions":"10/10second;100/2minutes"}

# rate limit后端: [memory, redis, memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
#REQ_RATE_LIMIT_BACKEND=redis://localhost:6379

# `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
GLOBAL_RATE_LIMIT=
GLOBAL_RATE_LIMIT=100/minute

#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost.
Expand All @@ -38,7 +47,10 @@ TOKEN_RATE_LIMIT={"/v1/chat/completions":"50/second","/v1/completions":"60/secon


# TCP连接的超时时间(秒)
TIMEOUT=10
TIMEOUT=6

ITER_CHUNK_TYPE=one-by-one
#ITER_CHUNK_TYPE=efficiency

IP_BLACKLIST=

Expand Down
8 changes: 7 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ OPENAI_BASE_URL='https://api.openai.com, http://localhost:8080'
OPENAI_ROUTE_PREFIX='/openai, /localai'

CHAT_COMPLETION_ROUTE=/openai/v1/chat/completions
COMPLETION_ROUTE=/v1/completions

# OPENAI_API_KEY:允许输入多个api key, 以逗号隔开, 形成轮询池
OPENAI_API_KEY='sk-xxx1, sk-xxx2, sk-xxx3'
Expand All @@ -34,6 +35,9 @@ REQ_RATE_LIMIT='{
"/localai/v1/chat/completions": "2/second"
}'

# rate limit后端: [memory, redis, Memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
REQ_RATE_LIMIT_BACKEND="redis://localhost:6379"

# `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
GLOBAL_RATE_LIMIT=inf

Expand All @@ -48,7 +52,9 @@ PROXY=http://localhost:7890
TOKEN_RATE_LIMIT={"/v1/chat/completions":"20/second", "/benchmark/v1/chat/completions":"500/second"}


TIMEOUT=100
TIMEOUT=10

ITER_CHUNK_TYPE=efficiency

# 设定时区
TZ=Asia/Shanghai
Binary file added .github/data/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
74 changes: 61 additions & 13 deletions Examples/chat_completion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import openai
from rich import print
from sparrow import yaml_load # pip install sparrow-python
from sparrow import MeasureTime, yaml_load # pip install sparrow-python

config = yaml_load("config.yaml", rel_path=True)
print(f"{config=}")
Expand All @@ -9,35 +9,83 @@

stream = True
# stream = False

# debug = True
debug = False

# is_function_call = True
is_function_call = False

user_content = """
用c实现目前已知最快平方根算法
"""
from sparrow import MeasureTime

mt = MeasureTime().start()
resp = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
# model="gpt-4",
messages=[
{"role": "user", "content": user_content},
],
stream=stream,
request_timeout=30,
)


# function_call
if is_function_call:
functions = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
resp = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "What's the weather like in Boston today?"}
],
functions=functions,
function_call="auto", # auto is default, but we'll be explicit
stream=stream,
request_timeout=30,
)

else:
resp = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
# model="gpt-4",
messages=[
{"role": "user", "content": user_content},
],
stream=stream,
request_timeout=30,
)

if stream:
if debug:
for chunk in resp:
print(chunk)
else:
chunk_message = next(resp)['choices'][0]['delta']
print(f"{chunk_message['role']}: ")
if is_function_call:
function_call = chunk_message.get("function_call", "")
name = function_call["name"]
print(f"{chunk_message['role']}: \n{name}: ")
else:
print(f"{chunk_message['role']}: ")
for chunk in resp:
chunk_message = chunk['choices'][0]['delta']
content = chunk_message.get("content", "")
content = ""
if is_function_call:
function_call = chunk_message.get("function_call", "")
if function_call:
content = function_call.get("arguments", "")

else:
content = chunk_message.get("content", "")
print(content, end="")
print()
else:
Expand Down
24 changes: 18 additions & 6 deletions Examples/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@


stream = True
# debug=True
debug = False


user_content = "现在让我们使用泰勒展开推导出牛顿法迭代公式: \n"
Expand All @@ -27,11 +29,21 @@
console = Console()
sentences = ""
if stream:
for chunk in resp:
text = chunk['choices'][0]['text']
console.print(text, end="")
sentences += text
if debug:
for chunk in resp:
print(chunk)
else:
for chunk in resp:
text = chunk['choices'][0]['text']
console.print(text, end="")
sentences += text
print()

# print(70*"-")
else:
if debug:
print(resp)
else:
sentences = resp['choices'][0]['text']
print(sentences)

print(70 * "-")
# console.print(Markdown(sentences))
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
OpenAI Forward
<br>
</h1>
<p align="center">
一个支持多目标路由、流量控制、自动重试以及一键云端部署的高效代理工具
</p>


<div align=center><img src=.github/data/logo.png width="300px"></div>



<p align="center">
<a href="https://pypi.org/project/openai-forward/">
Expand Down Expand Up @@ -44,10 +46,9 @@
</div>

openai-forward
是一个专为大型语言模型设计的高级转发服务,提供包括用户请求速率控制、Token速率限制和自定义API密钥等增强功能
是一个专为大型语言模型设计的高级转发服务,提供包括用户请求速率控制、Token速率限制、日志记录和自定义API密钥等功能
该服务可用于代理本地模型(如 [LocalAI](https://github.com/go-skynet/LocalAI))或云端模型(如 [OpenAI](https://api.openai.com))。
服务由 `fastapi`,`aiohttp`,`asyncio`全异步实现,保证了其高效性。

服务由 `fastapi`,`aiohttp`,`asyncio`完全异步实现。


<a>
Expand All @@ -60,8 +61,8 @@ OpenAI-Forward 提供如下功能:

- **全能代理**: 具备转发几乎所有类型请求的能力
- **用户流量控制**: 实现用户请求速率限制(RPM)和流式Token速率限制(TPM)
- **实时响应日志**: 支持流式响应的会话日志记录,用于调试自己的prompt合理性
- **自定义秘钥**: 允许用户用自定义生成的密钥替代原始API密钥
- **实时响应日志**: 支持流式响应的会话日志记录
- **多目标路由**: 能够同时转发多个服务到不同的路由地址
- **自动重试机制**:在请求失败时自动重试
- **快速部署**: `pip`/`docker` 快速本地安装和部署,支持一键云端部署
Expand Down
2 changes: 1 addition & 1 deletion openai_forward/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.6.0"
__version__ = "0.6.1"

from dotenv import load_dotenv

Expand Down
10 changes: 5 additions & 5 deletions openai_forward/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ def convert(log_folder: str = None, target_path: str = None):
Returns:
None
"""
from openai_forward.helper import convert_folder_to_jsonl
from openai_forward.helper import convert_folder_to_jsonl, route_prefix_to_str
from openai_forward.settings import OPENAI_ROUTE_PREFIX

print(60 * '-')
if log_folder is None:
if target_path is not None:
raise ValueError("target_path must be None when log_folder is None")
_prefix_list = [i.replace("/", "_") for i in OPENAI_ROUTE_PREFIX]
for _prefix in _prefix_list:
log_folder = f"./Log/chat/{_prefix}"
target_path = f"./Log/chat{_prefix}.json"
_prefix_list = [route_prefix_to_str(i) for i in OPENAI_ROUTE_PREFIX]
for prefix in _prefix_list:
log_folder = f"./Log/{prefix}/chat"
target_path = f"./Log/chat_{prefix}.json"
print(f"Convert {log_folder}/*.log to {target_path}")
convert_folder_to_jsonl(log_folder, target_path)
print(60 * '-')
Expand Down
18 changes: 10 additions & 8 deletions openai_forward/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,25 @@
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded

from . import custom_slowapi
from .forward import create_generic_proxies, create_openai_proxies
from . import __version__, custom_slowapi
from .forward.extra import generic_objs
from .forward.openai import openai_objs
from .helper import normalize_route as normalize_route_path
from .settings import (
BENCHMARK_MODE,
RATE_LIMIT_BACKEND,
RATE_LIMIT_STRATEGY,
dynamic_request_rate_limit,
get_limiter_key,
show_startup,
)

limiter = Limiter(key_func=get_limiter_key, strategy=RATE_LIMIT_STRATEGY)

app = FastAPI(title="openai_forward", version="0.5")
limiter = Limiter(
key_func=get_limiter_key,
strategy=RATE_LIMIT_STRATEGY,
storage_uri=RATE_LIMIT_BACKEND,
)
app = FastAPI(title="openai-forward", version=__version__)

app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
Expand Down Expand Up @@ -57,9 +62,6 @@ def healthz(request: Request):
methods=["POST"],
)

openai_objs = create_openai_proxies()
generic_objs = create_generic_proxies()


@app.on_event("shutdown")
async def shutdown():
Expand Down
Loading

0 comments on commit e7da8de

Please sign in to comment.