From 8985a8538b97ce5ea37d5f21d52ce891e2f157f7 Mon Sep 17 00:00:00 2001 From: mamei16 Date: Sun, 14 Apr 2024 15:55:58 +0200 Subject: [PATCH 1/4] Fix whisper STT (#5856) --- extensions/whisper_stt/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index efa58ce97a..efc62f41e3 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -62,7 +62,7 @@ def ui(): whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"]) whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"]) - audio.change( + audio.stop_recording( auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then( None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}") From a0c69749e696e9f858faae68a50cdb81b89569fd Mon Sep 17 00:00:00 2001 From: Philipp Emanuel Weidmann Date: Thu, 18 Apr 2024 23:35:00 +0530 Subject: [PATCH 2/4] Revert sse-starlette version bump because it breaks API request cancellation (#5873) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9583ff6ad8..1db9642ad7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,7 +30,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) diff --git a/requirements_amd.txt b/requirements_amd.txt index 4c8ab32f4c..0da9380a28 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index af77eb99c0..c296ea2aaa 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a3df36d839..2f09e39f18 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # Mac wheels diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 36c63344a8..44c3976d72 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # Mac wheels diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index f4b52b7cd4..4cdbd685c4 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index ee70e14113..6903ee5b0a 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -28,7 +28,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 5643d98bf6..a134179fed 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -30,7 +30,7 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index dddcf0c0b8..030f77a6bd 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -28,5 +28,5 @@ wandb # API SpeechRecognition==3.10.0 flask_cloudflared==0.0.14 -sse-starlette==2.1.0 +sse-starlette==1.6.5 tiktoken From b30bce3b2f93fd601eb48cd882eadc7056421ff0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:55:34 -0700 Subject: [PATCH 3/4] Bump transformers to 4.40 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1db9642ad7..c5ca17b5f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 0da9380a28..3bac470dc3 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index c296ea2aaa..656d1e35e3 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 2f09e39f18..fc46b82f36 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 44c3976d72..d694f28836 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 4cdbd685c4..76a6e9c390 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 6903ee5b0a..4f72bb2d9d 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index a134179fed..8f1671719e 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 030f77a6bd..8da69bd2c0 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.39.* +transformers==4.40.* tqdm wandb From f27e1ba302971df80d2e06cfb3074d5004e769d3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:24:46 -0300 Subject: [PATCH 4/4] Add a /v1/internal/chat-prompt endpoint (#5879) --- extensions/openai/completions.py | 21 ++++++++++++++------- extensions/openai/models.py | 3 ++- extensions/openai/script.py | 11 +++++++++++ extensions/openai/typing.py | 4 ++++ modules/models_settings.py | 7 ++++--- 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 5925101a32..44c1df86af 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -135,6 +135,7 @@ def convert_history(history): current_message = "" current_reply = "" user_input = "" + user_input_last = True system_message = "" # Multimodal: convert OpenAI format to multimodal extension format @@ -188,6 +189,7 @@ def convert_history(history): if role == "user": user_input = content + user_input_last = True if current_message: chat_dialogue.append([current_message, '']) current_message = "" @@ -195,6 +197,7 @@ def convert_history(history): current_message = content elif role == "assistant": current_reply = content + user_input_last = False if current_message: chat_dialogue.append([current_message, current_reply]) current_message = "" @@ -204,13 +207,13 @@ def convert_history(history): elif role == "system": system_message = content - # if current_message: - # chat_dialogue.append([current_message, '']) + if not user_input_last: + user_input = "" return user_input, system_message, {'internal': chat_dialogue, 'visible': copy.deepcopy(chat_dialogue)} -def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -> dict: +def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, prompt_only=False) -> dict: if body.get('functions', []): raise InvalidRequestError(message="functions is not supported.", param='functions') @@ -310,14 +313,18 @@ def chat_streaming_chunk(content): # chunk[resp_list][0]["logprobs"] = None return chunk - if stream: - yield chat_streaming_chunk('') - # generate reply ####################################### - prompt = generate_chat_prompt(user_input, generate_params) + prompt = generate_chat_prompt(user_input, generate_params, _continue=continue_) + if prompt_only: + yield {'prompt': prompt} + return + token_count = len(encode(prompt)[0]) debug_msg({'prompt': prompt, 'generate_params': generate_params}) + if stream: + yield chat_streaming_chunk('') + generator = generate_chat_reply( user_input, generate_params, regenerate=False, _continue=continue_, loading_message=False) diff --git a/extensions/openai/models.py b/extensions/openai/models.py index 01045f90e5..a7e67df6f6 100644 --- a/extensions/openai/models.py +++ b/extensions/openai/models.py @@ -9,7 +9,8 @@ def get_current_model_info(): return { 'model_name': shared.model_name, - 'lora_names': shared.lora_names + 'lora_names': shared.lora_names, + 'loader': shared.args.loader } diff --git a/extensions/openai/script.py b/extensions/openai/script.py index e86473572e..03d99e8ded 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -3,6 +3,7 @@ import logging import os import traceback +from collections import deque from threading import Thread import speech_recognition as sr @@ -31,6 +32,7 @@ from .typing import ( ChatCompletionRequest, ChatCompletionResponse, + ChatPromptResponse, CompletionRequest, CompletionResponse, DecodeRequest, @@ -259,6 +261,15 @@ async def handle_logits(request_data: LogitsRequest): return JSONResponse(response) +@app.post('/v1/internal/chat-prompt', response_model=ChatPromptResponse, dependencies=check_key) +async def handle_chat_prompt(request: Request, request_data: ChatCompletionRequest): + path = request.url.path + is_legacy = "/generate" in path + generator = OAIcompletions.chat_completions_common(to_dict(request_data), is_legacy=is_legacy, prompt_only=True) + response = deque(generator, maxlen=1).pop() + return JSONResponse(response) + + @app.post("/v1/internal/stop-generation", dependencies=check_key) async def handle_stop_generation(request: Request): stop_everything_event() diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index c3ef04041f..2b30ebf2bd 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -124,6 +124,10 @@ class ChatCompletionResponse(BaseModel): usage: dict +class ChatPromptResponse(BaseModel): + prompt: str + + class EmbeddingsRequest(BaseModel): input: str | List[str] | List[int] | List[List[int]] model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.") diff --git a/modules/models_settings.py b/modules/models_settings.py index b7a7d3321b..5c29243146 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -136,9 +136,6 @@ def get_model_metadata(model): if 'instruction_template' not in model_settings: model_settings['instruction_template'] = 'Alpaca' - if model_settings['instruction_template'] != 'Custom (obtained from model metadata)': - model_settings['instruction_template_str'] = chat.load_instruction_template(model_settings['instruction_template']) - # Ignore rope_freq_base if set to the default value if 'rope_freq_base' in model_settings and model_settings['rope_freq_base'] == 10000: model_settings.pop('rope_freq_base') @@ -150,6 +147,10 @@ def get_model_metadata(model): for k in settings[pat]: model_settings[k] = settings[pat][k] + # Load instruction template if defined by name rather than by value + if model_settings['instruction_template'] != 'Custom (obtained from model metadata)': + model_settings['instruction_template_str'] = chat.load_instruction_template(model_settings['instruction_template']) + return model_settings