Merge pull request #6585 from oobabooga/dev

Merge dev branch
oobabooga · Dec 19, 2024 · 4d466d5 · 4d466d5
2 parents cc8c7ed + fee23df
commit 4d466d5
Show file tree

Hide file tree

Showing 34 changed files with 1,199 additions and 431 deletions.
diff --git a/README.md b/README.md
@@ -4,9 +4,9 @@ A Gradio web UI for Large Language Models.
 
 Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
 
-|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-INSTRUCT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-CHAT.png) |
 |:---:|:---:|
-|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-DEFAULT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-PARAMETERS.png) |
 
 ## Features
 
@@ -202,18 +202,19 @@ List of command-line flags
 
 ```txt
 usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
-                 [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices]
-                 [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code]
-                 [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE]
-                 [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock]
-                 [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm]
-                 [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn]
-                 [--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act]
-                 [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
+                 [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]]
+                 [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast]
+                 [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores]
+                 [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
+                 [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
+                 [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
+                 [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2]
+                 [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
                  [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
                  [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
-                 [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
+                 [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
                  [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
+                 [--cache_4bit] [--cache_8bit] [--chat-buttons]
 
 Text generation web UI
 
@@ -232,7 +233,6 @@ Basic settings:
                                                  file will be loaded by default without the need to use the --settings flag.
   --extensions EXTENSIONS [EXTENSIONS ...]       The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
   --verbose                                      Print the prompts to the terminal.
-  --chat-buttons                                 Show buttons on the chat tab instead of a hover menu.
   --idle-timeout IDLE_TIMEOUT                    Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
 
 Model loader:
@@ -291,9 +291,8 @@ ExLlamaV2:
   --no_flash_attn                                Force flash-attention to not be used.
   --no_xformers                                  Force xformers to not be used.
   --no_sdpa                                      Force Torch SDPA to not be used.
-  --cache_8bit                                   Use 8-bit cache to save VRAM.
-  --cache_4bit                                   Use Q4 cache to save VRAM.
   --num_experts_per_token NUM_EXPERTS_PER_TOKEN  Number of experts to use for generation. Applies to MoE models like Mixtral.
+  --enable_tp                                    Enable Tensor Parallelism (TP) in ExLlamaV2.
 
 AutoGPTQ:
   --triton                                       Use triton.
@@ -311,6 +310,9 @@ HQQ:
 TensorRT-LLM:
   --cpp-runner                                   Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
 
+Cache:
+  --cache_type CACHE_TYPE                        KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.
+
 DeepSpeed:
   --deepspeed                                    Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
   --nvme-offload-dir NVME_OFFLOAD_DIR            DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
@@ -332,6 +334,7 @@ Gradio:
   --ssl-keyfile SSL_KEYFILE                      The path to the SSL certificate key file.
   --ssl-certfile SSL_CERTFILE                    The path to the SSL certificate cert file.
   --subpath SUBPATH                              Customize the subpath for gradio, use with reverse proxy
+  --old-colors                                   Use the legacy Gradio colors, before the December/2024 update.
 
 API:
   --api                                          Enable the API extension.

diff --git a/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf b/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf
diff --git a/css/Inter/Inter-VariableFont_opsz,wght.ttf b/css/Inter/Inter-VariableFont_opsz,wght.ttf
diff --git a/css/chat_style-cai-chat-square.css b/css/chat_style-cai-chat-square.css
@@ -16,6 +16,6 @@
 }
 
 .message {
-    padding-bottom: 30px;
+    padding-bottom: 2em;
     grid-template-columns: 70px minmax(0, 1fr);
 }
diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css
@@ -1,7 +1,7 @@
 .message {
     display: grid;
     grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 15px;
+    padding-bottom: 2em;
     font-size: 15px;
     font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
     line-height: 22.5px !important;

diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
@@ -1,74 +1,101 @@
 .chat {
     background: transparent;
-    padding: 24px 19px;
-    padding-right: 19px !important;
+    padding: 0;
     padding-top: 0;
 }
 
-.chat > .messages {
-    padding-top: 18px !important;
+.chat > .messages:first-child {
+    padding-top: 0 !important;
 }
 
-.message {
-    display: grid;
-    grid-template-columns: 60px 1fr;
-    padding-bottom: 25px;
-    font-size: 15px;
-    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
-    line-height: 24px;
+.chat > .messages > :last-child {
+    margin-bottom: 1.7rem !important;
 }
 
-.message:first-child {
-    padding-top: 0;
+.chat .message-body p, .chat .message-body li {
+    font-size: 1rem !important;
+    line-height: 28px !important;
 }
 
-.username {
-    display: none;
+.dark .chat .message-body p,
+.dark .chat .message-body li,
+.dark .chat .message-body q {
+    color: #d1d5db !important;
 }
 
-.message-body p, .message-body li {
-    font-size: 15px !important;
-    line-height: 24px !important;
+.chat .message-body p,
+.chat .message-body ul,
+.chat .message-body ol {
+    margin-top: 1.25em !important;
+    margin-bottom: 1.25em !important;
 }
 
-.message-body p, .chat .message-body ul, .chat .message-body ol {
-    margin-bottom: 16px !important;
+.chat .message-body p:first-child,
+.chat .message-body ul:first-child,
+.chat .message-body ol:first-child {
+    margin-top: 0 !important;
 }
 
-.message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
+.chat .message-body p:last-child,
+.chat .message-body ul:last-child,
+.chat .message-body ol:last-child {
     margin-bottom: 0 !important;
 }
 
-.gradio-container .chat .assistant-message {
-    padding: 20px;
+.chat .message-body li {
+    margin-top: 1.25em !important;
+    margin-bottom: 1.25em !important;
+}
+
+.user-message, .assistant-message {
+    font-family: Inter, Helvetica, Arial, sans-serif;
+}
+
+.message:first-child {
+    padding-top: 0;
+}
+
+.username {
+    display: none;
+}
+
+.chat .user-message {
+    padding: 1.5rem 1rem;
+    border-radius: 0;
+    border-bottom-right-radius: 0;
+}
+
+.chat .assistant-message {
     background: #f4f4f4;
-    margin-top: 9px !important;
-    margin-bottom: 12px !important;
-    border-radius: 7px;
-    border: 1px solid var(--border-color-primary);
+    padding: 1.5rem 1rem;
+    border-radius: 0;
+    border: 0;
+}
+
+.dark .chat .user-message {
+    background: transparent;
 }
 
 .dark .chat .assistant-message {
-    background: var(--color-grey-800);
+    background: var(--light-gray);
 }
 
-.gradio-container .chat .user-message {
-    padding: 20px;
-    padding-left: 0;
-    padding-right: 0;
-    background-color: transparent;
-    border-radius: 8px;
-    border-bottom-right-radius: 0;
+.chat .user-message .text,
+.chat .assistant-message .text {
+    max-width: 40.25rem;
+    margin-left: auto;
+    margin-right: auto;
 }
 
-.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
-    margin-bottom: 0 !important;
+/* Create space between two assistant messages in a row */
+.assistant-message + .assistant-message {
+    margin-top: 1.5rem;
 }
 
-code {
+pre > code {
     background-color: #f3f4f6 !important;
 }
 
-.dark code {
+.dark pre > code {
     background-color: #1f2937 !important;
 }