From cca4a934dd8cbe49ae2dda0d8eb9b85ad8e3760f Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 23 Jan 2025 11:49:40 +0800 Subject: [PATCH] fix for chat templates and drafting --- gpttype_adapter.cpp | 16 ++++++++++++---- kcpp_adapters/DeepSeek-V2.json | 8 ++++++++ koboldcpp.py | 14 +++++++------- 3 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 kcpp_adapters/DeepSeek-V2.json diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 5d23138f54de1..03c1c4e50add7 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -601,10 +601,18 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll } else { - printf("Error: Draft model vocab of (%d) does not match base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab); - printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n"); - llama_free(draft_ctx); - draft_ctx = nullptr; + int diff = abs(draftvocab-base_n_vocab); + if(diff <= 256) + { + //allow small differences to work + printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab); + } else { + printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab); + printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n"); + llama_free(draft_ctx); + draft_ctx = nullptr; + } + } } } diff --git a/kcpp_adapters/DeepSeek-V2.json b/kcpp_adapters/DeepSeek-V2.json new file mode 100644 index 0000000000000..ded08b14d6ad1 --- /dev/null +++ b/kcpp_adapters/DeepSeek-V2.json @@ -0,0 +1,8 @@ +{ + "system_start": "", + "system_end": "", + "user_start": "<|User|>", + "user_end": "", + "assistant_start": "<|Assistant|>", + "assistant_end": "<|end▁of▁sentence|>" +} diff --git a/koboldcpp.py b/koboldcpp.py index 4e075e29190bc..927c8677860e2 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -59,7 +59,7 @@ modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.82.3" +KcppVersion = "1.82.4" showdebug = True guimode = False showsamplerwarning = True @@ -3421,7 +3421,7 @@ def auto_set_backend_gui(manual_select=False): def on_picked_model_file(filepath): if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'): #load it as a config file instead - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: dict = json.load(f) import_vars(dict) @@ -4014,7 +4014,7 @@ def export_vars(): try: if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter): print("Embedding chat completions adapter...") # parse and save embedded preload story - with open(args.chatcompletionsadapter, 'r') as f: + with open(args.chatcompletionsadapter, 'r', encoding='utf-8', errors='ignore') as f: args.chatcompletionsadapter = json.load(f) except Exception: pass @@ -4025,7 +4025,7 @@ def export_vars(): try: if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory): print("Embedding preload story...") # parse and save embedded preload story - with open(args.preloadstory, 'r') as f: + with open(args.preloadstory, 'r', encoding='utf-8', errors='ignore') as f: args.preloadstory = json.load(f) except Exception: pass @@ -4283,7 +4283,7 @@ def load_config_gui(): #this is used to populate the GUI with a config file, whe if not filename or filename=="": return runmode_untouched = False - with open(filename, 'r') as f: + with open(filename, 'r', encoding='utf-8', errors='ignore') as f: dict = json.load(f) import_vars(dict) pass @@ -4761,7 +4761,7 @@ def unload_libs(): def load_config_cli(filename): print("Loading .kcpps configuration file...") - with open(filename, 'r') as f: + with open(filename, 'r', encoding='utf-8', errors='ignore') as f: config = json.load(f) args.istemplate = False raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps @@ -4990,7 +4990,7 @@ def main(launch_args,start_server=True): ccadapter_path = os.path.abspath(premade_adapt_path) if ccadapter_path: print(f"Loading Chat Completions Adapter: {ccadapter_path}") - with open(ccadapter_path, 'r') as f: + with open(ccadapter_path, 'r', encoding='utf-8', errors='replace') as f: chatcompl_adapter = json.load(f) canload = True else: