oobabooga · oobabooga · Dec 17, 2023 · Dec 16, 2023 · Dec 16, 2023 · Dec 17, 2023
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * Dropdown menu for quickly switching between different models.
 * Large number of extensions (built-in and user-contributed), including Coqui TTS for realistic voice outputs, Whisper STT for voice inputs, translation, [multimodal pipelines](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal), vector databases, Stable Diffusion integration, and a lot more. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
 * [Chat with custom characters](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab#character).
-* Precise chat templates for instruction-following models, including Llama-2-chat, Alpaca, Vicuna, Mistral, and many others.
+* Precise chat templates for instruction-following models, including Llama-2-chat, Alpaca, Vicuna, Mistral.
 * LoRA: train new LoRAs with your own data, load/unload LoRAs on the fly for generation.
 * Transformers library integration: load models in 4-bit or 8-bit precision through bitsandbytes, use llama.cpp with transformers samplers (`llamacpp_HF` loader), CPU inference in 32-bit precision using PyTorch.
 * OpenAI-compatible API server with Chat and Completions endpoints -- see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
@@ -274,6 +274,7 @@ List of command-line flags
 |`--cfg-cache`                         | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |
 |`--no_flash_attn`                     | Force flash-attention to not be used. |
 |`--cache_8bit`                        | Use 8-bit cache to save VRAM. |
+|`--num_experts_per_token NUM_EXPERTS_PER_TOKEN` |  Number of experts to use for generation. Applies to MoE models like Mixtral. |
 
 #### AutoGPTQ
 
@@ -377,7 +378,7 @@ text-generation-webui
     └── llama-2-13b-chat.Q4_K_M.gguf
 ```
 
-* Other models (like 16-bit transformers models and GPTQ models) are made of several files and must be placed in a subfolder. Example:
+* The remaining model types (like 16-bit transformers models and GPTQ models) are made of several files and must be placed in a subfolder. Example:
 
 ```
 text-generation-webui

diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
@@ -1,10 +1,18 @@
+.chat {
+  background: var(--block-background-fill);
+  padding: 24px 19px;
+  padding-right: 19px !important;
+  border: 1px solid var(--block-border-color);
+  border-radius: 8px;
+}
+
 .message {
     display: grid;
     grid-template-columns: 60px 1fr;
     padding-bottom: 25px;
     font-size: 15px;
     font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
-    line-height: 22px;
+    line-height: 24px;
 }
 
 .username {
@@ -13,11 +21,16 @@
 
 .message-body p, .message-body li {
     font-size: 15px !important;
-    line-height: 22.5px !important;
+    line-height: 24px !important;
+    list-style-position: outside;
 }
 
 .message-body p, .chat .message-body ul, .chat .message-body ol {
-    margin-bottom: 23.4375px !important;
+    margin-bottom: 16px !important;
+}
+
+.chat .message-body ul, .chat .message-body ol {
+    padding-inline-start: 2em;
 }
 
 .message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
@@ -34,34 +47,34 @@
 
 .gradio-container .chat .assistant-message {
     padding: 20px;
-    border-radius: 20px;
-    background-color: #0000000f;
-    margin-top: 9px !important;
-    margin-bottom: 18px !important;
+    background: var(--background-fill-secondary);
+    margin-top: 12px !important;
+    margin-bottom: 24px !important;
+    margin-right: 16px;
+    border-radius: 22px;
+    border-bottom-left-radius: 0;
+    border: 1px solid var(--border-color-primary);
 }
 
 .gradio-container .chat .user-message {
     padding: 20px;
+    background-color: var(--color-accent-soft);
     border-radius: 20px;
-    margin-bottom: 9px !important;
+    margin-bottom: 12px !important;
+    margin-left: 16px;
+    border-radius: 22px;
+    border-bottom-right-radius: 0;
+    border: 1px solid var(--border-color-accent-subdued);
 }
 
 .gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
     margin-bottom: 0 !important;
 }
 
-.dark .chat .assistant-message {
-    background-color: #1f2937;
-}
-
-.dark .chat .user-message {
-    background-color: transparent;
-}
-
 code {
-    background-color: white !important;
+    background-color: #f3f4f6 !important;
 }
 
 .dark code {
-    background-color: #0e1321 !important;
+    background-color: #1f2937 !important;
 }
diff --git a/css/main.css b/css/main.css
@@ -332,7 +332,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     margin-left: auto;
     margin-right: auto;
     max-width: 880px;
-    height: 100%;
+    min-height: var(--chat-height);
     overflow-y: auto;
     padding-right: 15px;
     display: flex;

diff --git a/grammars/japanese.gbnf b/grammars/japanese.gbnf
diff --git a/grammars/json.gbnf b/grammars/json.gbnf
@@ -1,25 +1,14 @@
 root   ::= object
-value  ::= object | array | string | number | ("true" | "false" | "null") ws
 
-object ::=
-  "{" ws (
-            string ":" ws value
-    ("," ws string ":" ws value)*
-  )? "}" ws
+object ::= "{" ws ( string ":" ws value ("," ws string ":" ws value)* )? "}"
+
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
 
-array  ::=
-  "[" ws (
-            value
-    ("," ws value)*
-  )? "]" ws
+array  ::= "[" ws ( value ("," ws value)* )? "]" ws
 
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
-  )* "\"" ws
+string ::= "\"" ( [a-zA-Z0-9] )* "\"" ws
 
 number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
 
-# Optional space: by convention, applied in this grammar after literal chars when allowed
+
 ws ::= ([ \t\n] ws)?
diff --git a/grammars/json_arr.gbnf b/grammars/json_arr.gbnf
diff --git a/grammars/json_w_trailing_space.gbnf b/grammars/json_w_trailing_space.gbnf
@@ -0,0 +1,14 @@
+root   ::= object
+
+object ::= "{" ws ( string ":" ws value ("," ws string ":" ws value)* )? "}" ws
+
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+array  ::= "[" ws ( value ("," ws value)* )? "]" ws
+
+string ::= "\"" ( [a-zA-Z0-9] )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+
+ws ::= ([ \t\n] ws)?
diff --git a/grammars/list.gbnf b/grammars/list.gbnf
@@ -1,4 +1,2 @@
-root ::= item+
-
-# Excludes various line break characters
-item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"
+root       ::= "1. " paragraph "\n" ([0-9] [0-9]? ". " paragraph "\n")+
+paragraph  ::= [a-zA-Z'.,; ]+
diff --git a/grammars/simple_arithmetic.gbnf b/grammars/simple_arithmetic.gbnf
@@ -0,0 +1,7 @@
+root  ::= (expr "=" ws term "\n")+
+expr  ::= term ([-+*/] term)*
+term  ::= num | "(" ws expr ")" ws
+num   ::= [0-9]+ ws
+ws    ::= [ \t\n]*
+# this is a comment
+
diff --git a/js/main.js b/js/main.js
@@ -123,6 +123,8 @@ targetElement.addEventListener("scroll", function() {
 // Create a MutationObserver instance
 const observer = new MutationObserver(function(mutations) {
   mutations.forEach(function(mutation) {
+    updateChatHeight();
+
     if(!isScrolled) {
       targetElement.scrollTop = targetElement.scrollHeight;
     }
@@ -153,56 +155,6 @@ const config = {
 // Start observing the target element
 observer.observe(targetElement, config);
 
-//------------------------------------------------
-// Notebook box scrolling
-//------------------------------------------------
-const notebookElement = document.querySelector("#textbox-notebook textarea");
-let notebookScrolled = false;
-
-notebookElement.addEventListener("scroll", function() {
-  let diff = notebookElement.scrollHeight - notebookElement.clientHeight;
-  if(Math.abs(notebookElement.scrollTop - diff) <= 10 || diff == 0) {
-    notebookScrolled = false;
-  } else {
-    notebookScrolled = true;
-  }
-});
-
-const notebookObserver = new MutationObserver(function(mutations) {
-  mutations.forEach(function(mutation) {
-    if(!notebookScrolled) {
-      notebookElement.scrollTop = notebookElement.scrollHeight;
-    }
-  });
-});
-
-notebookObserver.observe(notebookElement.parentNode.parentNode.parentNode, config);
-
-//------------------------------------------------
-// Default box scrolling
-//------------------------------------------------
-const defaultElement = document.querySelector("#textbox-default textarea");
-let defaultScrolled = false;
-
-defaultElement.addEventListener("scroll", function() {
-  let diff = defaultElement.scrollHeight - defaultElement.clientHeight;
-  if(Math.abs(defaultElement.scrollTop - diff) <= 10 || diff == 0) {
-    defaultScrolled = false;
-  } else {
-    defaultScrolled = true;
-  }
-});
-
-const defaultObserver = new MutationObserver(function(mutations) {
-  mutations.forEach(function(mutation) {
-    if(!defaultScrolled) {
-      defaultElement.scrollTop = defaultElement.scrollHeight;
-    }
-  });
-});
-
-defaultObserver.observe(defaultElement.parentNode.parentNode.parentNode, config);
-
 //------------------------------------------------
 // Add some scrollbars
 //------------------------------------------------
@@ -373,3 +325,15 @@ function toggleBigPicture() {
   }
 }
 
+//------------------------------------------------
+// Define the --chat-height global CSS variable to
+// the height of the chat parent
+//------------------------------------------------
+function updateChatHeight() {
+  const chatContainer = document.getElementById('chat').parentNode.parentNode.parentNode;
+  const newChatHeight = `${chatContainer.clientHeight}px`;
+
+  document.documentElement.style.setProperty('--chat-height', newChatHeight);
+}
+
+window.addEventListener('resize', updateChatHeight);
diff --git a/modules/chat.py b/modules/chat.py
@@ -210,10 +210,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
     output = copy.deepcopy(history)
     output = apply_extensions('history', output)
     state = apply_extensions('state', state)
-    if shared.model_name == 'None' or shared.model is None:
-        logger.error("No model is loaded! Select one in the Model tab.")
-        yield output
-        return
 
     visible_text = None
     stopping_strings = get_stopping_strings(state)
@@ -252,6 +248,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
                     'internal': output['internal']
                 }
 
+    if shared.model_name == 'None' or shared.model is None:
+        raise ValueError("No model is loaded! Select one in the Model tab.")
+
     # Generate the prompt
     kwargs = {
         '_continue': _continue,

diff --git a/modules/exllamav2.py b/modules/exllamav2.py
@@ -48,6 +48,7 @@ def from_pretrained(self, path_to_model):
         config.scale_pos_emb = shared.args.compress_pos_emb
         config.scale_alpha_value = shared.args.alpha_value
         config.no_flash_attn = shared.args.no_flash_attn
+        config.num_experts_per_token = int(shared.args.num_experts_per_token)
 
         model = ExLlamaV2(config)
 

diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py
@@ -165,5 +165,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         config.scale_pos_emb = shared.args.compress_pos_emb
         config.scale_alpha_value = shared.args.alpha_value
         config.no_flash_attn = shared.args.no_flash_attn
+        config.num_experts_per_token = int(shared.args.num_experts_per_token)
 
         return Exllamav2HF(config)
diff --git a/modules/grammar.py b/modules/grammar.py