From cca4a934dd8cbe49ae2dda0d8eb9b85ad8e3760f Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 23 Jan 2025 11:49:40 +0800
Subject: [PATCH] fix for chat templates and drafting

---
 gpttype_adapter.cpp            | 16 ++++++++++++----
 kcpp_adapters/DeepSeek-V2.json |  8 ++++++++
 koboldcpp.py                   | 14 +++++++-------
 3 files changed, 27 insertions(+), 11 deletions(-)
 create mode 100644 kcpp_adapters/DeepSeek-V2.json

diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 5d23138f54de1..03c1c4e50add7 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -601,10 +601,18 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
             }
             else
             {
-                printf("Error: Draft model vocab of (%d) does not match base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
-                printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
-                llama_free(draft_ctx);
-                draft_ctx = nullptr;
+                int diff = abs(draftvocab-base_n_vocab);
+                if(diff <= 256)
+                {
+                    //allow small differences to work
+                    printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab);
+                } else {
+                    printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
+                    printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
+                    llama_free(draft_ctx);
+                    draft_ctx = nullptr;
+                }
+
             }
         }
     }
diff --git a/kcpp_adapters/DeepSeek-V2.json b/kcpp_adapters/DeepSeek-V2.json
new file mode 100644
index 0000000000000..ded08b14d6ad1
--- /dev/null
+++ b/kcpp_adapters/DeepSeek-V2.json
@@ -0,0 +1,8 @@
+{
+  "system_start": "",
+  "system_end": "",
+  "user_start": "<｜User｜>",
+  "user_end": "",
+  "assistant_start": "<｜Assistant｜>",
+  "assistant_end": "<｜end▁of▁sentence｜>"
+}
diff --git a/koboldcpp.py b/koboldcpp.py
index 4e075e29190bc..927c8677860e2 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -59,7 +59,7 @@
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
-KcppVersion = "1.82.3"
+KcppVersion = "1.82.4"
 showdebug = True
 guimode = False
 showsamplerwarning = True
@@ -3421,7 +3421,7 @@ def auto_set_backend_gui(manual_select=False):
     def on_picked_model_file(filepath):
         if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
             #load it as a config file instead
-            with open(filepath, 'r') as f:
+            with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
                 dict = json.load(f)
                 import_vars(dict)
 
@@ -4014,7 +4014,7 @@ def export_vars():
         try:
             if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
                 print("Embedding chat completions adapter...")   # parse and save embedded preload story
-                with open(args.chatcompletionsadapter, 'r') as f:
+                with open(args.chatcompletionsadapter, 'r', encoding='utf-8', errors='ignore') as f:
                     args.chatcompletionsadapter = json.load(f)
         except Exception:
             pass
@@ -4025,7 +4025,7 @@ def export_vars():
         try:
             if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
                 print("Embedding preload story...")   # parse and save embedded preload story
-                with open(args.preloadstory, 'r') as f:
+                with open(args.preloadstory, 'r', encoding='utf-8', errors='ignore') as f:
                     args.preloadstory = json.load(f)
         except Exception:
             pass
@@ -4283,7 +4283,7 @@ def load_config_gui(): #this is used to populate the GUI with a config file, whe
         if not filename or filename=="":
             return
         runmode_untouched = False
-        with open(filename, 'r') as f:
+        with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
             dict = json.load(f)
             import_vars(dict)
         pass
@@ -4761,7 +4761,7 @@ def unload_libs():
 
 def load_config_cli(filename):
     print("Loading .kcpps configuration file...")
-    with open(filename, 'r') as f:
+    with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
         config = json.load(f)
         args.istemplate = False
         raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps
@@ -4990,7 +4990,7 @@ def main(launch_args,start_server=True):
                 ccadapter_path = os.path.abspath(premade_adapt_path)
         if ccadapter_path:
             print(f"Loading Chat Completions Adapter: {ccadapter_path}")
-            with open(ccadapter_path, 'r') as f:
+            with open(ccadapter_path, 'r', encoding='utf-8', errors='replace') as f:
                 chatcompl_adapter = json.load(f)
                 canload = True
         else: