diff --git a/README.md b/README.md index fb22d09945..0e16aa30a5 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ A Gradio web UI for Large Language Models. Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. -|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) | +|![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-INSTRUCT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-CHAT.png) | |:---:|:---:| -|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) | +|![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-DEFAULT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-PARAMETERS.png) | ## Features @@ -202,18 +202,19 @@ List of command-line flags ```txt usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS] - [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] - [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] - [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] - [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] - [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] - [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] - [--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] - [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] + [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] + [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] + [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores] + [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] + [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE] + [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] + [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] + [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] - [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] + [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention] + [--cache_4bit] [--cache_8bit] [--chat-buttons] Text generation web UI @@ -232,7 +233,6 @@ Basic settings: file will be loaded by default without the need to use the --settings flag. --extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. --verbose Print the prompts to the terminal. - --chat-buttons Show buttons on the chat tab instead of a hover menu. --idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again. Model loader: @@ -291,9 +291,8 @@ ExLlamaV2: --no_flash_attn Force flash-attention to not be used. --no_xformers Force xformers to not be used. --no_sdpa Force Torch SDPA to not be used. - --cache_8bit Use 8-bit cache to save VRAM. - --cache_4bit Use Q4 cache to save VRAM. --num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral. + --enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2. AutoGPTQ: --triton Use triton. @@ -311,6 +310,9 @@ HQQ: TensorRT-LLM: --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet. +Cache: + --cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4. + DeepSpeed: --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. @@ -332,6 +334,7 @@ Gradio: --ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file. --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy + --old-colors Use the legacy Gradio colors, before the December/2024 update. API: --api Enable the API extension. diff --git a/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf b/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf new file mode 100644 index 0000000000..43ed4f5ee6 Binary files /dev/null and b/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf differ diff --git a/css/Inter/Inter-VariableFont_opsz,wght.ttf b/css/Inter/Inter-VariableFont_opsz,wght.ttf new file mode 100644 index 0000000000..e31b51e3e9 Binary files /dev/null and b/css/Inter/Inter-VariableFont_opsz,wght.ttf differ diff --git a/css/chat_style-cai-chat-square.css b/css/chat_style-cai-chat-square.css index d626dbb1c8..854fff607c 100644 --- a/css/chat_style-cai-chat-square.css +++ b/css/chat_style-cai-chat-square.css @@ -16,6 +16,6 @@ } .message { - padding-bottom: 30px; + padding-bottom: 2em; grid-template-columns: 70px minmax(0, 1fr); } diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css index 618184cfab..d7b1ba88e4 100644 --- a/css/chat_style-cai-chat.css +++ b/css/chat_style-cai-chat.css @@ -1,7 +1,7 @@ .message { display: grid; grid-template-columns: 60px minmax(0, 1fr); - padding-bottom: 15px; + padding-bottom: 2em; font-size: 15px; font-family: 'Noto Sans', Helvetica, Arial, sans-serif; line-height: 22.5px !important; diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 50b9402f4d..f6ceb93245 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -1,74 +1,101 @@ .chat { background: transparent; - padding: 24px 19px; - padding-right: 19px !important; + padding: 0; padding-top: 0; } -.chat > .messages { - padding-top: 18px !important; +.chat > .messages:first-child { + padding-top: 0 !important; } -.message { - display: grid; - grid-template-columns: 60px 1fr; - padding-bottom: 25px; - font-size: 15px; - font-family: 'Noto Sans', Helvetica, Arial, sans-serif; - line-height: 24px; +.chat > .messages > :last-child { + margin-bottom: 1.7rem !important; } -.message:first-child { - padding-top: 0; +.chat .message-body p, .chat .message-body li { + font-size: 1rem !important; + line-height: 28px !important; } -.username { - display: none; +.dark .chat .message-body p, +.dark .chat .message-body li, +.dark .chat .message-body q { + color: #d1d5db !important; } -.message-body p, .message-body li { - font-size: 15px !important; - line-height: 24px !important; +.chat .message-body p, +.chat .message-body ul, +.chat .message-body ol { + margin-top: 1.25em !important; + margin-bottom: 1.25em !important; } -.message-body p, .chat .message-body ul, .chat .message-body ol { - margin-bottom: 16px !important; +.chat .message-body p:first-child, +.chat .message-body ul:first-child, +.chat .message-body ol:first-child { + margin-top: 0 !important; } -.message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child { +.chat .message-body p:last-child, +.chat .message-body ul:last-child, +.chat .message-body ol:last-child { margin-bottom: 0 !important; } -.gradio-container .chat .assistant-message { - padding: 20px; +.chat .message-body li { + margin-top: 1.25em !important; + margin-bottom: 1.25em !important; +} + +.user-message, .assistant-message { + font-family: Inter, Helvetica, Arial, sans-serif; +} + +.message:first-child { + padding-top: 0; +} + +.username { + display: none; +} + +.chat .user-message { + padding: 1.5rem 1rem; + border-radius: 0; + border-bottom-right-radius: 0; +} + +.chat .assistant-message { background: #f4f4f4; - margin-top: 9px !important; - margin-bottom: 12px !important; - border-radius: 7px; - border: 1px solid var(--border-color-primary); + padding: 1.5rem 1rem; + border-radius: 0; + border: 0; +} + +.dark .chat .user-message { + background: transparent; } .dark .chat .assistant-message { - background: var(--color-grey-800); + background: var(--light-gray); } -.gradio-container .chat .user-message { - padding: 20px; - padding-left: 0; - padding-right: 0; - background-color: transparent; - border-radius: 8px; - border-bottom-right-radius: 0; +.chat .user-message .text, +.chat .assistant-message .text { + max-width: 40.25rem; + margin-left: auto; + margin-right: auto; } -.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child { - margin-bottom: 0 !important; +/* Create space between two assistant messages in a row */ +.assistant-message + .assistant-message { + margin-top: 1.5rem; } -code { +pre > code { background-color: #f3f4f6 !important; } -.dark code { +.dark pre > code { background-color: #1f2937 !important; } diff --git a/css/main.css b/css/main.css index cf3babdba6..fef3d3f1bf 100644 --- a/css/main.css +++ b/css/main.css @@ -1,7 +1,46 @@ +:root { + --darker-gray: #202123; + --dark-gray: #343541; + --light-gray: #444654; + --light-theme-gray: #f4f4f4; + --border-color-dark: #525252; + --header-width: 112px; + --selected-item-color-dark: #32333e; +} + +@font-face { + font-family: Inter; + src: url('file/css/Inter/Inter-VariableFont_opsz,wght.ttf') format('truetype'); + font-weight: 100 900; + font-style: normal; +} + +@font-face { + font-family: Inter; + src: url('file/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf') format('truetype'); + font-weight: 100 900; + font-style: italic; +} + .tabs.svelte-710i53 { margin-top: 0 } +.padded.svelte-12cmxck { + padding: 3px 0; +} + +div.svelte-sfqy0y, +div.svelte-iyf88w { + background: transparent; + border: 0; +} + +/* "info" messages without a title above */ +.block > .svelte-e8n7p6:not(:only-of-type, #chat-mode *) { + margin-bottom: 2px; +} + .py-6 { padding-top: 2.5rem } @@ -19,7 +58,7 @@ height: 39.594px; align-self: end; line-height: 1em; - border-radius: 0.5em; + border-radius: 0.375rem; flex: none; } @@ -46,10 +85,6 @@ min-height: 0 } -.dark svg { - fill: white; -} - .dark a { color: white !important; } @@ -62,14 +97,20 @@ ol li p, ul li p { border: 0; } +#default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab { + padding: 1rem; +} + .gradio-container { max-width: 100% !important; padding-top: 0 !important; } #extensions { - margin-top: 5px; - margin-bottom: 35px; + margin: 5px auto 35px; + max-width: 880px; + padding: 1em; + padding-left: calc(var(--header-width) + 1em); } .extension-tab { @@ -86,20 +127,29 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { } gradio-app > :first-child { - padding-left: var(--size-4) !important; - padding-right: var(--size-4) !important; + padding: 0 !important; } .header_bar { - background-color: #f4f4f4; box-shadow: 0 0 3px rgba(22 22 22 / 35%); margin-bottom: 0; overflow-x: scroll; - margin-left: calc(-1 * var(--size-4)); - margin-right: calc(-1 * var(--size-4)); - display: block !important; text-wrap: nowrap; z-index: 90; + position: fixed; + display: flex !important; + flex-direction: column; + height: 100dvh; + width: var(--header-width); +} + +.header_bar button { + margin: 0; + padding: 0.75rem; +} + +.header_bar button.selected { + border: 0; } .dark .header_bar { @@ -113,23 +163,23 @@ gradio-app > :first-child { } .textbox_default textarea { - height: calc(100dvh - 271px); + height: calc(100dvh - 201px); } .textbox_default_output textarea { - height: calc(100dvh - 185px); + height: calc(100dvh - 117px); } .textbox textarea { - height: calc(100dvh - 241px); + height: calc(100dvh - 172px); } .textbox_logits textarea { - height: calc(100dvh - 236px); + height: calc(100dvh - 205px); } .textbox_logits_notebook textarea { - height: calc(100dvh - 292px); + height: calc(100dvh - 221px); } .monospace textarea { @@ -149,24 +199,6 @@ gradio-app > :first-child { color: #efefef !important; } -@media screen and (width <= 711px) { - .textbox_default textarea { - height: calc(100dvh - 259px); - } - - div .default-token-counter { - top: calc( 0.5 * (100dvh - 236px) ) !important; - } - - .transparent-substring { - display: none; - } - - .hover-menu { - min-width: 250px !important; - } -} - /* Hide the gradio footer */ footer { display: none !important; @@ -227,11 +259,13 @@ button { .pretty_scrollbar::-webkit-scrollbar-thumb, .pretty_scrollbar::-webkit-scrollbar-thumb:hover { background: var(--neutral-300); + border-radius: 30px; } .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: var(--neutral-700); + background: #ccc; + border-radius: 10px; } .pretty_scrollbar::-webkit-resizer { @@ -239,7 +273,8 @@ button { } .dark .pretty_scrollbar::-webkit-resizer { - background: #374151; + background: #ccc; + border-radius: 10px; } .pretty_scrollbar::-webkit-scrollbar-corner { @@ -251,20 +286,26 @@ audio { } /* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */ -.token-counter { +#default-token-counter, #notebook-token-counter { position: absolute !important; - top: calc( 0.5 * (100dvh - 218px) ) !important; - right: 2px; z-index: 100; background: var(--input-background-fill) !important; min-height: 0 !important; + width: 0; + text-align: left; + direction: rtl; + right: 5px; +} + +#default-token-counter { + top: calc(100dvh - 200px) !important; } -.default-token-counter { - top: calc( 0.5 * (100dvh - 248px) ) !important; +#notebook-token-counter { + top: calc(100dvh - 171px) !important; } -.token-counter span { +#default-token-counter span, #notebook-token-counter span { padding: 1px; box-shadow: 0 0 0 0.3em rgb(192 192 192 / 15%), inset 0 0 0.6em rgb(192 192 192 / 7.5%); border: 2px solid rgb(192 192 192 / 40%) !important; @@ -272,15 +313,15 @@ audio { } .no-background { - background: var(--background-fill-primary) !important; + background: transparent; padding: 0 !important; } /* ---------------------------------------------- Chat tab ---------------------------------------------- */ -.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { - height: 66.67vh +.h-\[40dvh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { + height: 66.67dvh } .gradio-container { @@ -310,7 +351,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #chat-tab { - padding-top: 0; + padding: 0; +} + +#chat-tab > :nth-child(1) { + display: flex; + flex-direction: row; + gap: 0; } #chat-tab button#Generate, #chat-tab button#stop { @@ -322,7 +369,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #chat-tab > :first-child, #extensions { - max-width: 880px; margin-left: auto; margin-right: auto; } @@ -342,61 +388,49 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .chat { margin-left: auto; margin-right: auto; - max-width: 880px; min-height: var(--chat-height); overflow-y: auto; - padding-right: 15px; display: flex; flex-direction: column; word-break: break-word; overflow-wrap: anywhere; border-top: none; - border-radius: 0 0 0 8px; + border-radius: 0; visibility: visible; } .chat-parent { - height: calc(100dvh - 98px - var(--header-height) - var(--input-delta)); + height: calc(100dvh - 98px - var(--input-delta)); overflow: auto !important; border-radius: 0 !important; margin-bottom: var(--input-delta) !important; } -/* On desktop, automatically hide the chat scroll bar - * when not hovered. */ -@media (hover: hover) and (pointer: fine) { - .chat-parent { - visibility: hidden; - } - - .chat-parent:focus, .chat-parent:hover { - visibility: visible; - } -} - .chat-parent .prose { visibility: visible; } -.old-ui .chat-parent { - height: calc(100dvh - 192px - var(--header-height) - var(--input-delta)); - margin-bottom: var(--input-delta) !important; +.chat .message { + width: min(100%, 48rem); + margin-left: auto; + margin-right: auto; + text-align: start; + padding-left: 1rem; + padding-right: 1rem; } .chat-parent.bigchat { - height: calc(100dvh - 98px - var(--header-height) - var(--input-delta)) !important; + height: calc(100dvh - 98px - var(--input-delta)) !important; margin-bottom: var(--input-delta) !important; } .chat > .messages { display: flex; flex-direction: column; - padding-top: 25px; } -.chat .message:last-child { - margin-bottom: 0 !important; - padding-bottom: 15px !important; +.chat > .messages > :first-child { + padding-top: 20px; } .message-body h1, @@ -404,7 +438,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body h3, .message-body h4 { color: var(--body-text-color); - margin: 20px 0 10px 0; + margin: 20px 0 10px; } .dark .message q { @@ -423,12 +457,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-inline-start: 2em; } -.message-body li:not(:last-child) { - margin-top: 0 !important; - margin-bottom: 2px !important; +.chat .message-body li:not(:last-child) { + margin-top: 0; + margin-bottom: 2px; } -.message-body li:last-child { +.chat .message-body li:last-child { margin-bottom: 0 !important; } @@ -456,7 +490,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { overflow: scroll; } -.message-body code { +.prose ul ul { + margin: 0; +} + +.message-body pre > code { white-space: pre-wrap !important; word-wrap: break-word !important; border: 1px solid #666; @@ -467,7 +505,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: #1f2328; } -.dark .message-body code { +.dark .message-body pre > code { background: #0d1117 !important; color: rgb(201 209 217); } @@ -477,8 +515,18 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding: 15px; } +.message-body :not(pre) > code::before { + content: "`"; +} + +.message-body :not(pre) > code::after { + content: "`"; +} + .message-body :not(pre) > code { white-space: normal !important; + font-weight: bold; + font-family: unset; } #chat-input { @@ -488,6 +536,15 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { border: none; } +#chat-input textarea { + padding: 0.65rem 2.5rem; +} + +#chat-input textarea::placeholder { + white-space: nowrap; + overflow: hidden; +} + #chat-input textarea:focus { box-shadow: none !important; } @@ -500,6 +557,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { display: none; } +.chat-input-positioned { + position: absolute; + bottom: 0; + max-width: 54rem; + left: 50%; + transform: translateX(-50%); +} + @media print { body { visibility: hidden; @@ -535,7 +600,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #show-controls { position: absolute; - height: 100%; background-color: transparent; border: 0 !important; border-radius: 0; @@ -544,7 +608,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #show-controls label { z-index: 1000; position: absolute; - right: 0; + right: 30px; + top: 10px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; @@ -626,7 +691,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: absolute; bottom: 80%; left: 0; - background-color: var(--background-fill-primary); box-shadow: 0 0 5px rgb(0 0 0 / 25%); z-index: 10000; min-width: 330px; @@ -637,7 +701,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { width: 100%; background: transparent !important; border-radius: 0 !important; - border-color: var(--border-color-primary); justify-content: space-between; margin: 0 !important; height: 36px; @@ -659,7 +722,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { opacity: 0.333; } -#chat-tab:not(.old-ui) #chat-buttons { +#chat-tab #chat-buttons { display: none !important; } @@ -690,23 +753,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #chat-input-row { - padding-bottom: 20px; + padding-bottom: 1.5em; + padding-left: 1rem; + padding-right: 1rem; } -.old-ui #chat-input-row, #chat-input-row.bigchat { - padding-bottom: 0 !important; +#chat-input-row.bigchat { + padding-bottom: 1px !important; } #chat-col { padding-bottom: 100px; } -.old-ui #chat-col, #chat-col.bigchat { - padding-bottom: 80px !important; +@media screen and (width <= 924px) { + #chat-col { + padding-bottom: 100px; + margin-top: 32px; + position: relative; /* Ensure positioning for the pseudo-element */ + } + + .chat-parent { + height: calc(100dvh - 98px - var(--input-delta) - 32px); + } + + .chat-parent.bigchat { + height: calc(100dvh - 98px - var(--input-delta) - 32px) !important; + } } -.old-ui #chat-buttons #clear-history-confirm { - order: -1; +#chat-col.bigchat { + padding-bottom: 80px !important; } .chat ol, .chat ul { @@ -721,26 +798,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } /* ---------------------------------------------- - Past chat histories in a side bar on desktop + Create the sidebars ---------------------------------------------- */ -@media screen and (width >= 1327px) { - #past-chats-row { - position: absolute; - top: 36px; - left: 0; - width: calc(0.5*(var(--document-width) - 880px - 120px - 16px*2)); - max-width: 300px; - margin-left: calc(-0.5*(var(--document-width) - 880px - 14px - 16px * 2)); - } +#chat-controls, +#past-chats-row { + width: 260px; + max-width: 80vw; + padding: 0.5rem; + height: 100dvh; + flex-shrink: 0; + box-sizing: content-box; + z-index: 10; +} - #chat-controls { - position: absolute; - top: 16px; - right: 0; - width: calc(0.5*(var(--document-width) - 880px - 120px - 16px*2)); - max-width: 400px; - margin-right: calc(-0.5*(var(--document-width) - 880px - 14px - 16px * 2)); - } +#past-chats-row:not(.negative-header) { + max-width: calc(85vw - var(--header-width)); +} + +#chat-controls { + padding: 1rem; + padding-bottom: 0; + overflow-y: scroll; +} + +#chat-controls > :nth-child(1) { + padding: 0.5rem; +} + +#past-chats-row + * { + width: unset; + flex-grow: 1; + flex-shrink: 1; } /* ---------------------------------------------- @@ -748,6 +836,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { ---------------------------------------------- */ .options { z-index: 100 !important; + border: 1px solid var(--input-border-color); + border-radius: 0; } /* ---------------------------------------------- @@ -757,12 +847,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: fixed; bottom: 0; left: 0; - width: calc((100vw - 880px - 120px) /2); + width: calc(100vw / 2 - 600px); + z-index: 10000; } .pfp_character { position: relative; - z-index: 100; } .pfp_character:hover { @@ -776,10 +866,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #past-chats { - max-height: calc(100vh - 195px); + max-height: calc(100dvh - 90px); overflow-y: scroll !important; border-radius: 0; - scrollbar-width: none; /* Hide scrollbar in Firefox by default */ + scrollbar-width: auto; +} + +#past-chats::-webkit-scrollbar { + display: block; } #past-chats label { @@ -790,6 +884,24 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { border-radius: 0; padding-top: 8px; padding-bottom: 8px; + position: relative; + min-height: 42px !important; +} + +#past-chats label::before { + content: url('data:image/svg+xml;utf8,'); + position: absolute; + top: 12px; + left: 12px; + margin-right: 8px; +} + +.dark #past-chats label::before { + content: url('data:image/svg+xml;utf8,'); +} + +#past-chats label span { + margin-left: 29px; } #past-chats > :nth-child(2) { @@ -797,23 +909,260 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #past-chats > :nth-child(3) { - gap: 0; + gap: 0.25rem; } -#past-chats::-webkit-scrollbar { +#past-chats input { display: none; } -#past-chats:hover { - scrollbar-width: auto; +#past-chats label { + padding: 0.75rem; + font-size: 12.5px; + font-weight: 400; } -#past-chats:hover::-webkit-scrollbar { - display: block; +#past-chats .selected, +#past-chats label:hover { + border-radius: 0.5rem; +} + +#past-chats label:hover { + cursor: pointer; +} + +#past-chats-buttons, +#delete-chat-row, +#rename-row { + width: 100%; + justify-content: center; +} + + +#past-chats-row, +#chat-controls { + width: 260px; + padding: 0.5rem; + height: calc(100dvh - 16px); + flex-shrink: 0; + box-sizing: content-box; } -@media screen and (width < 1327px) { - #past-chats { - max-height: 300px; +.sidebar-hidden { + width: 0 !important; + padding: 0 !important; + overflow: hidden; +} + +#past-chats-toggle, +#chat-controls-toggle, +#navigation-toggle { + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + user-select: none; + border-radius: 3px; + z-index: 1000; + position: fixed; + width: 2rem; + height: 2rem; + top: 0; +} + +#past-chats-toggle svg, +#chat-controls-toggle svg, +#navigation-toggle svg { + pointer-events: none; +} + +@media screen and (width <= 408px) { + #past-chats-toggle.past-chats-open { + top: 28px; + } + + #chat-controls-toggle.chat-controls-open { + top: 28px; + right: calc(16px + min(260px, 80vw)) !important; + } +} + +#past-chats-toggle.past-chats-open.negative-header { + left: calc(min(260px, 85vw) + 16px); +} + +#past-chats-toggle.past-chats-open:not(.negative-header) { + left: calc(112px + min(260px, calc(85vw - var(--header-width))) + 16px); +} + +#past-chats-toggle.past-chats-closed:not(.negative-header) { + left: 112px; +} + +#past-chats-toggle.past-chats-closed.negative-header { + left: 0; + top: 28px; +} + +@media screen and (width <= 924px) { + #past-chats-toggle.past-chats-closed.negative-header { + left: 28px; + top: 0; + } +} + +.header_bar ~ * { + margin-left: var(--header-width); +} + +/* Positions for chat-controls-toggle */ +#chat-controls-toggle.chat-controls-open { + right: calc(min(260px, 80vw) + 23px); +} + +#chat-controls-toggle.chat-controls-closed { + right: 7px; +} + +@media screen and (width <= 924px) { + #chat-controls.sidebar-shown { + position: fixed; + right: 0; + } + + #past-chats-row.sidebar-shown { + position: fixed; + } +} + +/* ---------------------------------------------- + Dark theme +---------------------------------------------- */ +.dark .header_bar { + background-color: var(--darker-gray) !important; +} + +.dark .header_bar button.selected { + background: var(--selected-item-color-dark); +} + +.dark #chat-input textarea { + background: var(--light-gray); + color: white !important; + border-color: #292c3b; +} + +.dark #chat-input textarea::placeholder { + color: #9ca3af; +} + +.dark .hover-menu { + background-color: var(--darker-gray); +} + +.dark .hover-menu button { + border-color: var(--border-color-primary); +} + +.dark #chat-controls, +.dark #past-chats-row { + background-color: var(--darker-gray); + border: 0 !important; +} + +.dark #past-chats .selected, +.dark #past-chats label:hover { + background-color: var(--selected-item-color-dark) !important; +} + +.dark #past-chats-row, +.dark #chat-controls { + background-color: var(--darker-gray); +} + +.dark #past-chats-toggle, +.dark #chat-controls-toggle, +.dark #navigation-toggle { + color: white; +} + +.dark svg { + fill: white; + color: white; +} + +@media screen and (width <= 408px) { + .dark #past-chats-toggle.past-chats-open { + background: var(--darker-gray); + } + + .dark #chat-controls-toggle.chat-controls-open { + background: var(--darker-gray); + } +} + +/* ---------------------------------------------- + Light theme +---------------------------------------------- */ +.header_bar { + background-color: var(--light-theme-gray) !important; +} + +.header_bar button.selected { + background: white; +} + +#chat-controls, +#past-chats-row { + background-color: var(--light-theme-gray); +} + +#chat-controls { + border-left: 1px solid #d9d9d0; +} + +#past-chats-row { + border-right: 1px solid #d9d9d0; +} + +#past-chats-toggle, +#chat-controls-toggle, +#navigation-toggle { + color: gray !important; +} + +.mobile-top-bar { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 32px; + z-index: 2; + opacity: 0; + pointer-events: none; +} + +@media screen and (width <= 924px) { + .mobile-top-bar { + opacity: 1; + pointer-events: auto; + } + + .dark .mobile-top-bar { + background-color: var(--darker-gray); + } + + .mobile-top-bar { + background-color: var(--light-theme-gray); + } +} + +@media screen and (width <= 408px) { + #past-chats-toggle.past-chats-open { + background: var(--light-theme-gray); + } + + #chat-controls-toggle.chat-controls-open { + background: var(--light-theme-gray); } } diff --git a/extensions/coqui_tts/requirements.txt b/extensions/coqui_tts/requirements.txt index 747f99a068..b0b691e8fe 100644 --- a/extensions/coqui_tts/requirements.txt +++ b/extensions/coqui_tts/requirements.txt @@ -1 +1 @@ -TTS==0.21.* \ No newline at end of file +coqui-tts==0.25.1 diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 6bd8f409a0..2cefc22bde 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -143,21 +143,20 @@ def convert_history(history): new_history = [] for entry in history: if isinstance(entry['content'], list): - image_url = None - content = None for item in entry['content']: if not isinstance(item, dict): continue - + + image_url = None + content = None if item['type'] == 'image_url' and isinstance(item['image_url'], dict): image_url = item['image_url']['url'] elif item['type'] == 'text' and isinstance(item['text'], str): content = item['text'] - - if image_url: - new_history.append({"image_url": image_url, "role": "user"}) - if content: - new_history.append({"content": content, "role": "user"}) + if image_url: + new_history.append({"image_url": image_url, "role": "user"}) + if content: + new_history.append({"content": content, "role": "user"}) else: new_history.append(entry) diff --git a/js/main.js b/js/main.js index 3028afac1f..a8018175db 100644 --- a/js/main.js +++ b/js/main.js @@ -18,16 +18,18 @@ document.querySelector(".header_bar").addEventListener("click", function(event) if (extensionsVisible) { if (extensions) { extensions.style.display = "flex"; - extensions.style.maxWidth = chatVisible ? "880px" : "none"; - extensions.style.padding = chatVisible ? "0px" : "15px"; } + this.style.marginBottom = chatVisible ? "0px" : "19px"; if (chatVisible && !showControlsChecked) { - document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => { + document.querySelectorAll( + "#chat-tab > div > :nth-child(1), #chat-tab > div > :nth-child(3), #chat-tab > div > :nth-child(4), #extensions" + ).forEach(element => { element.style.display = "none"; }); } + } else { this.style.marginBottom = "19px"; if (extensions) extensions.style.display = "none"; @@ -132,8 +134,7 @@ targetElement.addEventListener("scroll", function() { const observer = new MutationObserver(function(mutations) { updateCssProperties(); - const firstChild = targetElement.children[0]; - if (firstChild.classList.contains("generating")) { + if (targetElement.classList.contains("_generating")) { typing.parentNode.classList.add("visible-dots"); document.getElementById("stop").style.display = "flex"; document.getElementById("Generate").style.display = "none"; @@ -255,7 +256,7 @@ for (i = 0; i < slimDropdownElements.length; i++) { // The show/hide events were adapted from: // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js //------------------------------------------------ -var buttonsInChat = document.querySelectorAll("#chat-tab:not(.old-ui) #chat-buttons button"); +var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button"); var button = document.getElementById("hover-element-button"); var menu = document.getElementById("hover-menu"); var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement; @@ -290,12 +291,6 @@ if (buttonsInChat.length > 0) { thisButton.innerHTML = newText; } } -} else { - buttonsInChat = document.querySelectorAll("#chat-tab.old-ui #chat-buttons button"); - for (let i = 0; i < buttonsInChat.length; i++) { - buttonsInChat[i].textContent = buttonsInChat[i].textContent.replace(/ \(.*?\)/, ""); - } - document.getElementById("gr-hover-container").style.display = "none"; } function isMouseOverButtonOrMenu() { @@ -339,6 +334,8 @@ menu.addEventListener("mouseleave", function () { // Add event listener for click anywhere in the document document.addEventListener("click", function (event) { + const target = event.target; + // Check if the click is outside the button/menu and the menu is visible if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") { hideMenu(); @@ -347,6 +344,21 @@ document.addEventListener("click", function (event) { if (event.target.classList.contains("pfp_character")) { toggleBigPicture(); } + + // Handle sidebar clicks on mobile + if (isMobile()) { + // Check if the click did NOT originate from any of the specified toggle buttons or elements + if ( + target.closest("#navigation-toggle") !== navigationToggle && + target.closest("#past-chats-toggle") !== pastChatsToggle && + target.closest("#chat-controls-toggle") !== chatControlsToggle && + target.closest(".header_bar") !== headerBar && + target.closest("#past-chats-row") !== pastChatsRow && + target.closest("#chat-controls") !== chatControlsRow + ) { + handleIndividualSidebarClose(event); + } + } }); //------------------------------------------------ @@ -361,10 +373,9 @@ for (var i = 0; i < 2; i++) { parent.insertBefore(elementToMove, parent.firstChild); //------------------------------------------------ -// Make the chat input grow upwards instead of downwards +// Position the chat input //------------------------------------------------ -document.getElementById("show-controls").parentNode.style.position = "absolute"; -document.getElementById("show-controls").parentNode.style.bottom = "0px"; +document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned"); //------------------------------------------------ // Focus on the chat input @@ -444,20 +455,10 @@ function updateCssProperties() { // Check if the chat container is visible if (chatContainer.clientHeight > 0) { - var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100; - if (document.getElementById("chat-tab").style.paddingBottom != "") { - numericHeight += 20; - } - - const newChatHeight = `${numericHeight}px`; + const newChatHeight = `${chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100 - 20}px`; document.documentElement.style.setProperty("--chat-height", newChatHeight); document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`); - // Get and set header height - const header = document.querySelector(".header_bar"); - const headerHeight = `${header.clientHeight}px`; - document.documentElement.style.setProperty("--header-height", headerHeight); - // Adjust scrollTop based on input height change if (chatInputHeight !== currentChatInputHeight) { if (!isScrolled && chatInputHeight < currentChatInputHeight) { @@ -477,18 +478,6 @@ new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-in // Handle changes in window size window.addEventListener("resize", updateCssProperties); -//------------------------------------------------ -// Keep track of the display width to position the past -// chats dropdown on desktop -//------------------------------------------------ -function updateDocumentWidth() { - var updatedWidth = window.innerWidth || document.documentElement.clientWidth || document.body.clientWidth; - document.documentElement.style.setProperty("--document-width", updatedWidth + "px"); -} - -updateDocumentWidth(); -window.addEventListener("resize", updateDocumentWidth); - //------------------------------------------------ // Focus on the rename text area when it becomes visible //------------------------------------------------ @@ -568,6 +557,8 @@ function moveToChatTab() { grandParent.style.display = "none"; } + grandParent.children[0].style.minWidth = "100%"; + const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild; const newParent = chatControlsFirstChild; let newPosition = newParent.children.length - 2; @@ -586,6 +577,7 @@ function restoreOriginalPosition() { document.getElementById("save-character").style.display = ""; movedElement.style.display = ""; + movedElement.children[0].style.minWidth = ""; } } @@ -612,3 +604,222 @@ window.addEventListener("beforeunload", function (event) { }); moveToChatTab(); + +//------------------------------------------------ +// Buttons to toggle the sidebars +//------------------------------------------------ + +const leftArrowSVG = ` + + + + + +`; + +const rightArrowSVG = ` + + + + + +`; + +const hamburgerMenuSVG = ` + + + + +`; + +const closeMenuSVG = ` + + + +`; + +const chatTab = document.getElementById("chat-tab"); +const pastChatsRow = document.getElementById("past-chats-row"); +const chatControlsRow = document.getElementById("chat-controls"); + +if (chatTab) { + // Create past-chats-toggle div + const pastChatsToggle = document.createElement("div"); + pastChatsToggle.id = "past-chats-toggle"; + pastChatsToggle.innerHTML = leftArrowSVG; // Set initial icon to left arrow + pastChatsToggle.classList.add("past-chats-open"); // Set initial position + + // Create chat-controls-toggle div + const chatControlsToggle = document.createElement("div"); + chatControlsToggle.id = "chat-controls-toggle"; + chatControlsToggle.innerHTML = rightArrowSVG; // Set initial icon to right arrow + chatControlsToggle.classList.add("chat-controls-open"); // Set initial position + + // Append both elements to the chat-tab + chatTab.appendChild(pastChatsToggle); + chatTab.appendChild(chatControlsToggle); +} + +// Create navigation toggle div +const navigationToggle = document.createElement("div"); +navigationToggle.id = "navigation-toggle"; +navigationToggle.innerHTML = leftArrowSVG; // Set initial icon to right arrow +navigationToggle.classList.add("navigation-left"); // Set initial position +headerBar.appendChild(navigationToggle); + +// Retrieve the dynamically created toggle buttons +const pastChatsToggle = document.getElementById("past-chats-toggle"); +const chatControlsToggle = document.getElementById("chat-controls-toggle"); + +function handleIndividualSidebarClose(event) { + const target = event.target; + + // Close navigation bar if click is outside and it is open + if (!headerBar.contains(target) && !headerBar.classList.contains("sidebar-hidden")) { + toggleSidebar(headerBar, navigationToggle, true); + } + + // Close past chats row if click is outside and it is open + if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains("sidebar-hidden")) { + toggleSidebar(pastChatsRow, pastChatsToggle, true); + } + + // Close chat controls row if click is outside and it is open + if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains("sidebar-hidden")) { + toggleSidebar(chatControlsRow, chatControlsToggle, true); + } +} + +function toggleSidebar(sidebar, toggle, forceClose = false) { + const isCurrentlyHidden = sidebar.classList.contains("sidebar-hidden"); + const shouldClose = !isCurrentlyHidden; + + // Apply visibility classes + sidebar.classList.toggle("sidebar-hidden", shouldClose); + sidebar.classList.toggle("sidebar-shown", !shouldClose); + + if (sidebar === headerBar) { + // Special handling for header bar + document.documentElement.style.setProperty("--header-width", shouldClose ? "0px" : "112px"); + pastChatsRow.classList.toggle("negative-header", shouldClose); + pastChatsToggle.classList.toggle("negative-header", shouldClose); + toggle.innerHTML = shouldClose ? hamburgerMenuSVG : closeMenuSVG; + } else if (sidebar === pastChatsRow) { + // Past chats sidebar + toggle.classList.toggle("past-chats-closed", shouldClose); + toggle.classList.toggle("past-chats-open", !shouldClose); + toggle.innerHTML = shouldClose ? rightArrowSVG : leftArrowSVG; + } else if (sidebar === chatControlsRow) { + // Chat controls sidebar + toggle.classList.toggle("chat-controls-closed", shouldClose); + toggle.classList.toggle("chat-controls-open", !shouldClose); + toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG; + } + + // Mobile handling + if (isMobile()) { + sidebar.classList.toggle("sidebar-shown", !shouldClose); + } +} + +// Function to check if the device is mobile +function isMobile() { + return window.innerWidth <= 924; +} + +// Function to initialize sidebars +function initializeSidebars() { + const isOnMobile = isMobile(); + + if (isOnMobile) { + // Mobile state: Hide sidebars and set closed states + [pastChatsRow, chatControlsRow, headerBar].forEach(el => { + el.classList.add("sidebar-hidden"); + el.classList.remove("sidebar-shown"); + }); + + document.documentElement.style.setProperty("--header-width", "0px"); + pastChatsRow.classList.add("negative-header"); + pastChatsToggle.classList.add("negative-header", "past-chats-closed"); + pastChatsToggle.classList.remove("past-chats-open"); + + [chatControlsToggle, navigationToggle].forEach(el => { + el.classList.add("chat-controls-closed"); + el.classList.remove("chat-controls-open"); + }); + + pastChatsToggle.innerHTML = rightArrowSVG; + chatControlsToggle.innerHTML = leftArrowSVG; + navigationToggle.innerHTML = hamburgerMenuSVG; + } else { + // Desktop state: Show sidebars and set open states + [pastChatsRow, chatControlsRow].forEach(el => { + el.classList.remove("sidebar-hidden", "sidebar-shown"); + }); + + pastChatsToggle.classList.add("past-chats-open"); + pastChatsToggle.classList.remove("past-chats-closed"); + + [chatControlsToggle, navigationToggle].forEach(el => { + el.classList.add("chat-controls-open"); + el.classList.remove("chat-controls-closed"); + }); + + pastChatsToggle.innerHTML = leftArrowSVG; + chatControlsToggle.innerHTML = rightArrowSVG; + navigationToggle.innerHTML = closeMenuSVG; + } +} + +// Run the initializer when the page loads +initializeSidebars(); + +// Add click event listeners to toggle buttons +pastChatsToggle.addEventListener("click", () => { + toggleSidebar(pastChatsRow, pastChatsToggle); +}); + +chatControlsToggle.addEventListener("click", () => { + toggleSidebar(chatControlsRow, chatControlsToggle); +}); + +navigationToggle.addEventListener("click", () => { + toggleSidebar(headerBar, navigationToggle); +}); + +//------------------------------------------------ +// Fixes #chat-input textarea height issue +// for devices with width <= 924px +//------------------------------------------------ + +if (isMobile()) { + // Target the textarea + const textarea = document.querySelector("#chat-input textarea"); + + if (textarea) { + // Simulate adding and removing a newline + textarea.value += "\n"; + textarea.dispatchEvent(new Event("input", { bubbles: true })); + textarea.value = textarea.value.slice(0, -1); + textarea.dispatchEvent(new Event("input", { bubbles: true })); + } +} + +//------------------------------------------------ +// Create a top navigation bar on mobile +//------------------------------------------------ + +function createMobileTopBar() { + const chatTab = document.getElementById("chat-tab"); + + // Only create the top bar if it doesn't already exist + if (chatTab && !chatTab.querySelector(".mobile-top-bar")) { + const topBar = document.createElement("div"); + topBar.classList.add("mobile-top-bar"); + + // Insert the top bar as the first child of chat-tab + chatTab.appendChild(topBar); + } +} + +createMobileTopBar(); diff --git a/js/show_controls.js b/js/show_controls.js index 1ff88e52aa..1a87b52d96 100644 --- a/js/show_controls.js +++ b/js/show_controls.js @@ -1,4 +1,6 @@ -const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions"); +const belowChatInput = document.querySelectorAll( + "#chat-tab > div > :nth-child(1), #chat-tab > div > :nth-child(3), #chat-tab > div > :nth-child(4), #extensions" +); const chatParent = document.querySelector(".chat-parent"); function toggle_controls(value) { diff --git a/modules/block_requests.py b/modules/block_requests.py index 886930f0c0..6adc385a75 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -47,7 +47,7 @@ def my_open(*args, **kwargs): if len(args) > 1 and args[1] == 'rb': file_contents = file_contents.decode('utf-8') - file_contents = file_contents.replace('\t\t', '') + file_contents = file_contents.replace('\t\t', '') file_contents = file_contents.replace('cdnjs.cloudflare.com', '127.0.0.1') file_contents = file_contents.replace( '', diff --git a/modules/chat.py b/modules/chat.py index b81cfea6ee..92808fb7de 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -352,13 +352,17 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)): # Extract the reply - visible_reply = reply if state['mode'] in ['chat', 'chat-instruct']: - visible_reply = re.sub("(||{{user}})", state['name1'], reply) + visible_reply = re.sub("(||{{user}})", state['name1'], reply + '❚') + else: + visible_reply = reply + '❚' visible_reply = html.escape(visible_reply) if shared.stop_everything: + if output['visible'][-1][1].endswith('❚'): + output['visible'][-1][1] = output['visible'][-1][1][:-1] + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output return @@ -374,6 +378,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if is_stream: yield output + if output['visible'][-1][1].endswith('❚'): + output['visible'][-1][1] = output['visible'][-1][1][:-1] + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output @@ -606,9 +613,9 @@ def find_all_histories_with_first_prompts(state): first_prompt = first_prompt.strip() - # Truncate the first prompt if it's longer than 32 characters - if len(first_prompt) > 32: - first_prompt = first_prompt[:29] + '...' + # Truncate the first prompt if it's longer than 30 characters + if len(first_prompt) > 30: + first_prompt = first_prompt[:30-3] + '...' result.append((first_prompt, filename)) @@ -1087,9 +1094,8 @@ def handle_delete_chat_confirm_click(state): def handle_rename_chat_click(): return [ - gr.update(visible=True, value="My New Chat"), + gr.update(value="My New Chat"), gr.update(visible=True), - gr.update(visible=True) ] @@ -1100,16 +1106,14 @@ def handle_rename_chat_confirm(rename_to, state): return [ gr.update(choices=histories, value=rename_to), gr.update(visible=False), - gr.update(visible=False), - gr.update(visible=False) ] def handle_upload_chat_history(load_chat_history, state): history = start_new_chat(state) history = load_history_json(load_chat_history, history) - histories = find_all_histories_with_first_prompts(state) save_history(history, state['unique_id'], state['character_menu'], state['mode']) + histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) @@ -1209,7 +1213,7 @@ def handle_delete_template_click(template): return [ f"{template}.yaml", "instruction-templates/", - gr.update(visible=True) + gr.update(visible=False) ] diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 0498c4882e..9b6da83c87 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -2,17 +2,19 @@ from pathlib import Path import torch + from exllamav2 import ( ExLlamaV2, ExLlamaV2Cache, ExLlamaV2Cache_8bit, ExLlamaV2Cache_Q4, + ExLlamaV2Cache_Q6, + ExLlamaV2Cache_Q8, ExLlamaV2Cache_TP, ExLlamaV2Config, ExLlamaV2Tokenizer ) from exllamav2.generator import ExLlamaV2Sampler, ExLlamaV2StreamingGenerator - from modules import shared from modules.logging_colors import logger from modules.text_generation import get_max_prompt_length @@ -57,12 +59,20 @@ def from_pretrained(self, path_to_model): model.load(split) # Determine the correct cache type - if shared.args.cache_8bit: + kv_cache_type = shared.args.cache_type.lower() + + if kv_cache_type == 'fp16': + cache_type = ExLlamaV2Cache + elif kv_cache_type == 'fp8': cache_type = ExLlamaV2Cache_8bit - elif shared.args.cache_4bit: + elif kv_cache_type == 'q8': + cache_type = ExLlamaV2Cache_Q8 + elif kv_cache_type == 'q6': + cache_type = ExLlamaV2Cache_Q6 + elif kv_cache_type == 'q4': cache_type = ExLlamaV2Cache_Q4 else: - cache_type = ExLlamaV2Cache + raise ValueError(f"Invalid cache type for ExLlamaV2: {cache_type}. Valid options are: fp16, fp8, q8, q6, q4.") # Use TP if specified if shared.args.enable_tp: diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 320a8d2467..62d1e0547c 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -4,18 +4,20 @@ from typing import Any, Dict, Optional, Union import torch +from torch.nn import CrossEntropyLoss +from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel +from transformers.modeling_outputs import CausalLMOutputWithPast + from exllamav2 import ( ExLlamaV2, ExLlamaV2Cache, ExLlamaV2Cache_8bit, ExLlamaV2Cache_Q4, + ExLlamaV2Cache_Q6, + ExLlamaV2Cache_Q8, ExLlamaV2Cache_TP, ExLlamaV2Config ) -from torch.nn import CrossEntropyLoss -from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel -from transformers.modeling_outputs import CausalLMOutputWithPast - from modules import shared from modules.logging_colors import logger @@ -45,12 +47,20 @@ def __init__(self, config: ExLlamaV2Config): self.ex_model.load(split) # Determine the correct cache type - if shared.args.cache_8bit: + kv_cache_type = shared.args.cache_type.lower() + + if kv_cache_type == 'fp16': + cache_type = ExLlamaV2Cache + elif kv_cache_type == 'fp8': cache_type = ExLlamaV2Cache_8bit - elif shared.args.cache_4bit: + elif kv_cache_type == 'q8': + cache_type = ExLlamaV2Cache_Q8 + elif kv_cache_type == 'q6': + cache_type = ExLlamaV2Cache_Q6 + elif kv_cache_type == 'q4': cache_type = ExLlamaV2Cache_Q4 else: - cache_type = ExLlamaV2Cache + raise ValueError(f"Invalid cache type for ExLlamaV2: {cache_type}. Valid options are: fp16, fp8, q8, q6, q4.") # Use TP if specified if shared.args.enable_tp: diff --git a/modules/html_generator.py b/modules/html_generator.py index d0afd6b213..01b2086610 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -104,6 +104,8 @@ def convert_to_markdown(string): result = '' is_code = False is_latex = False + previous_line_empty = True + for line in string.split('\n'): stripped_line = line.strip() @@ -120,13 +122,20 @@ def convert_to_markdown(string): elif stripped_line.endswith('\\\\]'): is_latex = False - result += line - - # Don't add an extra \n for tables, code, or LaTeX - if is_code or is_latex or line.startswith('|'): - result += '\n' + # Preserve indentation for lists and code blocks + if stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line): + result += line + '\n' + previous_line_empty = False + elif is_code or is_latex or line.startswith('|'): + result += line + '\n' + previous_line_empty = False else: - result += '\n\n' + if previous_line_empty: + result += line.strip() + '\n' + else: + result += line.strip() + '\n\n' + + previous_line_empty = stripped_line == '' result = result.strip() if is_code: @@ -145,14 +154,15 @@ def convert_to_markdown(string): result = re.sub(list_item_pattern, r'\g<1> ' + delete_str, result) # Convert to HTML using markdown - html_output = markdown.markdown(result, extensions=['fenced_code', 'tables']) + html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2) # Remove the delete string from the HTML output pos = html_output.rfind(delete_str) if pos > -1: html_output = html_output[:pos] + html_output[pos + len(delete_str):] else: - html_output = markdown.markdown(result, extensions=['fenced_code', 'tables']) + # Convert to HTML using markdown + html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2) # Unescape code blocks pattern = re.compile(r']*>(.*?)', re.DOTALL) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 6611a7c1a8..f9964fe8b0 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -9,6 +9,7 @@ from modules import shared from modules.llama_cpp_python_hijack import llama_cpp_lib +from modules.llamacpp_model import get_llamacpp_cache_type_for_string from modules.logging_colors import logger @@ -196,12 +197,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P 'flash_attn': shared.args.flash_attn } - if shared.args.cache_4bit: - params["type_k"] = 2 - params["type_v"] = 2 - elif shared.args.cache_8bit: - params["type_k"] = 8 - params["type_v"] = 8 + if shared.args.cache_type != 'fp16': + params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) + params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) Llama = llama_cpp_lib().Llama model = Llama(**params) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 96f7ed56b5..6a76ee4e95 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -10,6 +10,35 @@ from modules.logging_colors import logger from modules.text_generation import get_max_prompt_length +llamacpp_quant_mapping = { + 'f32': 0, + 'fp16': 1, + 'q4_0': 2, + 'q4_1': 3, + 'q5_0': 6, + 'q5_1': 7, + 'q8_0': 8, + 'q8_1': 9, + 'q2_k': 10, + 'q3_k': 11, + 'q4_k': 12, + 'q5_k': 13, + 'q6_k': 14, + 'q8_k': 15, + 'iq4_nl': 20, + 'bf16': 30, +} + +llamacpp_valid_cache_types = {'fp16', 'q8_0', 'q4_0'} + + +def get_llamacpp_cache_type_for_string(quant_type: str): + quant_type = quant_type.lower() + if quant_type in llamacpp_valid_cache_types: + return llamacpp_quant_mapping[quant_type] + else: + raise ValueError(f"Invalid cache type for llama.cpp: {quant_type}. Valid options are: fp16, q8_0, q4_0.") + def ban_eos_logits_processor(eos_token, input_ids, logits): logits[eos_token] = -float('inf') @@ -75,12 +104,9 @@ def from_pretrained(self, path): 'flash_attn': shared.args.flash_attn } - if shared.args.cache_4bit: - params["type_k"] = 2 - params["type_v"] = 2 - elif shared.args.cache_8bit: - params["type_k"] = 8 - params["type_v"] = 8 + if shared.args.cache_type != 'fp16': + params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) + params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) result.model = Llama(**params) if cache_capacity > 0: diff --git a/modules/loaders.py b/modules/loaders.py index deee00a7f9..4cb7e349d6 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -31,8 +31,7 @@ 'llama.cpp': [ 'n_ctx', 'n_gpu_layers', - 'cache_8bit', - 'cache_4bit', + 'cache_type', 'tensor_split', 'n_batch', 'threads', @@ -54,8 +53,7 @@ 'llamacpp_HF': [ 'n_ctx', 'n_gpu_layers', - 'cache_8bit', - 'cache_4bit', + 'cache_type', 'tensor_split', 'n_batch', 'threads', @@ -87,8 +85,7 @@ 'no_xformers', 'no_sdpa', 'num_experts_per_token', - 'cache_8bit', - 'cache_4bit', + 'cache_type', 'autosplit', 'enable_tp', 'alpha_value', @@ -103,8 +100,7 @@ 'no_xformers', 'no_sdpa', 'num_experts_per_token', - 'cache_8bit', - 'cache_4bit', + 'cache_type', 'autosplit', 'enable_tp', 'alpha_value', diff --git a/modules/shared.py b/modules/shared.py index 894ed6fe56..cab612268a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -81,7 +81,6 @@ group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.') group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') -group.add_argument('--chat-buttons', action='store_true', help='Show buttons on the chat tab instead of a hover menu.') group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.') # Model loader @@ -143,8 +142,6 @@ group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.') group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.') group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to not be used.') -group.add_argument('--cache_8bit', action='store_true', help='Use 8-bit cache to save VRAM.') -group.add_argument('--cache_4bit', action='store_true', help='Use Q4 cache to save VRAM.') group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.') group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.') @@ -167,6 +164,10 @@ group = parser.add_argument_group('TensorRT-LLM') group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.') +# Cache +group = parser.add_argument_group('Cache') +group.add_argument('--cache_type', type=str, default='fp16', help='KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.') + # DeepSpeed group = parser.add_argument_group('DeepSpeed') group.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.') @@ -191,6 +192,7 @@ group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None) group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None) group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy') +group.add_argument('--old-colors', action='store_true', help='Use the legacy Gradio colors, before the December/2024 update.') # API group = parser.add_argument_group('API') @@ -213,6 +215,9 @@ group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED') +group.add_argument('--cache_4bit', action='store_true', help='DEPRECATED') +group.add_argument('--cache_8bit', action='store_true', help='DEPRECATED') +group.add_argument('--chat-buttons', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) @@ -269,6 +274,58 @@ def fix_loader_name(name): return 'TensorRT-LLM' +def transform_legacy_kv_cache_options(opts): + # Handle both argparse.Namespace and dict here + def get(key): + return opts.get(key) if isinstance(opts, dict) else getattr(opts, key, None) + + def set(key, value): + if isinstance(opts, dict): + opts[key] = value + else: + setattr(opts, key, value) + + def del_key(key, fallback_set): + # only remove from user dict, can't delete from argparse.Namespace + if type(opts) is dict: + if key in opts: + del opts[key] + else: + setattr(opts, key, fallback_set) + + # Retrieve values + loader = get('loader') + cache_8bit = get('cache_8bit') + cache_4bit = get('cache_4bit') + + # Determine cache type based on loader or legacy flags + if cache_8bit or cache_4bit: + if not loader: + # Legacy behavior: prefer 8-bit over 4-bit to minimize breakage + if cache_8bit: + set('cache_type', 'fp8') + elif cache_4bit: + set('cache_type', 'q4') + elif loader.lower() in ['exllamav2', 'exllamav2_hf']: + # ExLlamaV2 loader-specific cache type + if cache_8bit: + set('cache_type', 'fp8') + elif cache_4bit: + set('cache_type', 'q4') + elif loader.lower() in ['llama.cpp', 'llamacpp_hf']: + # Llama.cpp loader-specific cache type + if cache_4bit: + set('cache_type', 'q4_0') + elif cache_8bit: + set('cache_type', 'q8_0') + + # Clean up legacy keys + del_key('cache_4bit', False) + del_key('cache_8bit', False) + + return opts + + def add_extension(name, last=False): if args.extensions is None: args.extensions = [name] @@ -297,10 +354,14 @@ def load_user_config(): else: user_config = {} + for model_name in user_config: + user_config[model_name] = transform_legacy_kv_cache_options(user_config[model_name]) + return user_config args.loader = fix_loader_name(args.loader) +args = transform_legacy_kv_cache_options(args) # Activate the multimodal extension if args.multimodal_pipeline is not None: diff --git a/modules/ui.py b/modules/ui.py index c07beeb466..4bfea9fade 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -50,6 +50,50 @@ button_secondary_border_color="var(--border-color-primary)" ) +if not shared.args.old_colors: + theme = theme.set( + # General Colors + border_color_primary='#c5c5d2', + body_text_color_subdued='#484848', + background_fill_secondary='#eaeaea', + background_fill_secondary_dark='var(--selected-item-color-dark)', + background_fill_primary='var(--neutral-50)', + background_fill_primary_dark='var(--darker-gray)', + body_background_fill="white", + block_background_fill="transparent", + body_text_color="#333", + button_secondary_background_fill="#f4f4f4", + button_secondary_border_color="var(--border-color-primary)", + + # Dark Mode Colors + input_background_fill_dark='var(--darker-gray)', + checkbox_background_color_dark='var(--darker-gray)', + block_background_fill_dark='transparent', + block_border_color_dark='transparent', + input_border_color_dark='var(--border-color-dark)', + checkbox_border_color_dark='var(--border-color-dark)', + border_color_primary_dark='var(--border-color-dark)', + button_secondary_border_color_dark='var(--border-color-dark)', + body_background_fill_dark='var(--dark-gray)', + button_primary_background_fill_dark='transparent', + button_secondary_background_fill_dark='transparent', + checkbox_label_background_fill_dark='transparent', + button_cancel_background_fill_dark='transparent', + button_secondary_background_fill_hover_dark='var(--selected-item-color-dark)', + checkbox_label_background_fill_hover_dark='var(--selected-item-color-dark)', + table_even_background_fill_dark='var(--darker-gray)', + table_odd_background_fill_dark='var(--selected-item-color-dark)', + code_background_fill_dark='var(--darker-gray)', + + # Shadows and Radius + checkbox_label_shadow='none', + block_shadow='none', + block_shadow_dark='none', + button_large_radius='0.375rem', + button_large_padding='6px 12px', + input_radius='0.375rem', + ) + if Path("notification.mp3").exists(): audio_notification_js = "document.querySelector('#audio_notification audio')?.play();" else: @@ -87,8 +131,7 @@ def list_model_elements(): 'no_xformers', 'no_sdpa', 'num_experts_per_token', - 'cache_8bit', - 'cache_4bit', + 'cache_type', 'autosplit', 'enable_tp', 'threads', @@ -232,10 +275,10 @@ def gather_interface_values(*args): def apply_interface_values(state, use_persistent=False): if use_persistent: state = shared.persistent_interface_state - if 'textbox-default' in state: + if 'textbox-default' in state and 'prompt_menu-default' in state: state.pop('prompt_menu-default') - if 'textbox-notebook' in state: + if 'textbox-notebook' and 'prompt_menu-notebook' in state: state.pop('prompt_menu-notebook') elements = list_interface_input_elements() diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 57143cd8c0..e372f5c223 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -12,7 +12,6 @@ inputs = ('Chat input', 'interface_state') reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style', 'character_menu') -clear_arr = ('delete_chat-confirm', 'delete_chat', 'delete_chat-cancel') def create_ui(): @@ -21,7 +20,27 @@ def create_ui(): shared.gradio['Chat input'] = gr.State() shared.gradio['history'] = gr.JSON({'internal': [], 'visible': []}, visible=False) - with gr.Tab('Chat', elem_id='chat-tab', elem_classes=("old-ui" if shared.args.chat_buttons else None)): + with gr.Tab('Chat', elem_id='chat-tab'): + with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): + with gr.Column(): + with gr.Row(elem_id='past-chats-buttons'): + shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu) + shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) + shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input']) + + with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']: + shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input']) + shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input']) + + with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']: + shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background']) + with gr.Row(): + shared.gradio['rename_to-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input']) + shared.gradio['rename_to-confirm'] = gr.Button('Confirm', elem_classes=['refresh-button', 'focus-on-chat-input'], variant='primary') + + with gr.Row(): + shared.gradio['unique_id'] = gr.Radio(label="", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats') + with gr.Row(): with gr.Column(elem_id='chat-col'): shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')) @@ -60,25 +79,6 @@ def create_ui(): shared.gradio['send-chat-to-default'] = gr.Button('Send to default') shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook') - with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): - with gr.Column(): - with gr.Row(): - shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu) - shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) - shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input']) - shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input']) - shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input']) - - with gr.Row(elem_id='rename-row'): - shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', visible=False, elem_classes=['no-background']) - with gr.Row(): - shared.gradio['rename_to-confirm'] = gr.Button('Confirm', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input']) - shared.gradio['rename_to-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input']) - - gr.Markdown("Past chats") - with gr.Row(): - shared.gradio['unique_id'] = gr.Radio(label="", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats') - with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']): with gr.Column(): with gr.Row(): @@ -180,29 +180,39 @@ def create_event_handlers(): shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( + None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( + None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( + None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( + None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( + lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( + None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Replace last reply'].click( @@ -234,21 +244,21 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) - shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr)) - shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) + shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row')) + shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row')) shared.gradio['delete_chat-confirm'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id') + gradio(clear_arr), show_progress=False) + chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False) - shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) - shared.gradio['rename_to-cancel'].click(lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) + shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False) + shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False) shared.gradio['rename_to-confirm'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) + chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row')) shared.gradio['rename_to'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) + chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row'), show_progress=False) shared.gradio['load_chat_history'].upload( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( diff --git a/modules/ui_default.py b/modules/ui_default.py index 112acd2358..ccae9a5ec3 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -20,12 +20,12 @@ def create_ui(): with gr.Column(): with gr.Row(): shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar']) - shared.gradio['token-counter-default'] = gr.HTML(value="0", elem_classes=["token-counter", "default-token-counter"]) + shared.gradio['token-counter-default'] = gr.HTML(value="0", elem_id="default-token-counter") with gr.Row(): - shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary') - shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop') shared.gradio['Continue-default'] = gr.Button('Continue') + shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop', visible=False) + shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary') with gr.Row(): shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') @@ -63,20 +63,26 @@ def create_ui(): def create_event_handlers(): shared.gradio['Generate-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index f87b680aeb..189bedfdf1 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -118,8 +118,7 @@ def create_ui(): shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') - shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') - shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') + shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.') shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.') @@ -195,13 +194,13 @@ def create_event_handlers(): shared.gradio['model_menu'].change( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then( - load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success( + load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=True).success( handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) shared.gradio['load_model'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( update_model_parameters, gradio('interface_state'), None).then( - partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success( + partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=True).success( handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False) @@ -260,6 +259,8 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur yield ("Please enter a model path") return + repo_id = repo_id.strip() + specific_file = specific_file.strip() downloader = importlib.import_module("download-model").ModelDownloader() progress(0.0) @@ -297,7 +298,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur downloader.check_model_files(model, branch, links, sha256, output_folder) progress(1.0) else: - yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}`") + yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`") downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) yield (f"Model successfully saved to `{output_folder}/`.") @@ -317,7 +318,7 @@ def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()): links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True) output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF") - yield (f"Downloading tokenizer to `{output_folder}`") + yield (f"Downloading tokenizer to `{output_folder}/`") downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False) # Move the GGUF diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 799328447c..b234ac5753 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -23,7 +23,7 @@ def create_ui(): with gr.Tab('Raw'): with gr.Row(): shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar']) - shared.gradio['token-counter-notebook'] = gr.HTML(value="0", elem_classes=["token-counter"]) + shared.gradio['token-counter-notebook'] = gr.HTML(value="0", elem_id="notebook-token-counter") with gr.Tab('Markdown'): shared.gradio['markdown_render-notebook'] = gr.Button('Render') @@ -48,10 +48,10 @@ def create_ui(): shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace']) with gr.Row(): - shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button') - shared.gradio['Stop-notebook'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop') shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button') shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button') + shared.gradio['Stop-notebook'] = gr.Button('Stop', visible=False, elem_classes='small-button', elem_id='stop') + shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button') with gr.Column(scale=1): gr.HTML('
') @@ -66,22 +66,28 @@ def create_event_handlers(): shared.gradio['Generate-notebook'].click( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate-notebook'].click( lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Undo'].click( diff --git a/requirements.txt b/requirements.txt index 2549c64864..24c92391ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -accelerate==1.0.* -bitsandbytes==0.44.* +accelerate==1.2.* +bitsandbytes==0.45.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -32,30 +32,30 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index 78bdd3ca84..b7093d5099 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,14 +31,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 9420e861f8..88682aea04 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,12 +31,12 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 625021ee1f..6588278d03 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,8 +31,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 3cb66cbc81..1fc9795bb3 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,10 +31,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index fbd6447bdf..53fedd7ec5 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,7 +31,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e9ab0fbad3..9f52b17283 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -31,7 +31,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 99791ea9b4..9ad138d8d4 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,10 +1,10 @@ -accelerate==1.0.* -bitsandbytes==0.44.* +accelerate==1.2.* +bitsandbytes==0.45.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb @@ -32,30 +32,30 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index f5c3966eb3..e2daebd9ec 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,9 +1,9 @@ -accelerate==1.0.* +accelerate==1.2.* colorama datasets einops fastapi==0.112.4 -gradio==4.26.* +gradio==4.37.* jinja2==3.1.4 markdown numba==0.59.* @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.46.* +transformers==4.47.* tqdm wandb