diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d6837cfdf726d..382501ac03d0e 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -950,20 +950,20 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in llamamodel->hparams.rope_freq_scale_train!=1.0f || llamamodel->hparams.rope_scaling_type_train==2) { - float ropemultiplier = 1.0f; - if(llamamodel->hparams.rope_scaling_type_train!=2 && - llamamodel->hparams.n_ctx_train > 2048 && clamped_max_context_length > llamamodel->hparams.n_ctx_train && - llamamodel->hparams.rope_freq_scale_train==1.0f) - { - ropemultiplier = (float)llamamodel->hparams.n_ctx_train / (float)clamped_max_context_length; - llama_ctx_params.rope_freq_base = rope_freq_base = llamamodel->hparams.rope_freq_base_train; - llama_ctx_params.rope_freq_scale = rope_freq_scale = ropemultiplier * llamamodel->hparams.rope_freq_scale_train; - printf("Automatic RoPE Scaling: Using (scale:%.3f, base:%.1f).\n", rope_freq_scale, rope_freq_base); - } - else - { + // float ropemultiplier = 1.0f; + // if(llamamodel->hparams.rope_scaling_type_train!=2 && + // llamamodel->hparams.n_ctx_train > 2048 && clamped_max_context_length > llamamodel->hparams.n_ctx_train && + // llamamodel->hparams.rope_freq_scale_train==1.0f) + // { + // ropemultiplier = (float)llamamodel->hparams.n_ctx_train / (float)clamped_max_context_length; + // llama_ctx_params.rope_freq_base = rope_freq_base = llamamodel->hparams.rope_freq_base_train; + // llama_ctx_params.rope_freq_scale = rope_freq_scale = ropemultiplier * llamamodel->hparams.rope_freq_scale_train; + // printf("Automatic RoPE Scaling: Using (scale:%.3f, base:%.1f).\n", rope_freq_scale, rope_freq_base); + // } + // else + // { printf("Automatic RoPE Scaling: Using model internal value.\n"); - } + // } } else { diff --git a/koboldcpp.py b/koboldcpp.py index 542040c17f7a3..7fcd720dd976f 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -630,52 +630,85 @@ async def handle_request(self, genparams, api_format, stream_flag): except Exception as e: print(e) - def noscript_webui(self, path): + def noscript_webui(self): global modelbusy + import html import urllib.parse as urlparse - parsed_url = urlparse.urlparse(path) + parsed_url = urlparse.urlparse(self.path) parsed_dict = urlparse.parse_qs(parsed_url.query) - status = "Error" reply = "" + status = parsed_dict['status'][0] if 'status' in parsed_dict else "Ready To Generate" prompt = parsed_dict['prompt'][0] if 'prompt' in parsed_dict else "" - max_length = parsed_dict['max_length'][0] if 'max_length' in parsed_dict else 100 - temperature = parsed_dict['temperature'][0] if 'temperature' in parsed_dict else 0.7 - top_k = parsed_dict['top_k'][0] if 'top_k' in parsed_dict else 100 - top_p = parsed_dict['top_p'][0] if 'top_p' in parsed_dict else 0.9 - rep_pen = parsed_dict['rep_pen'][0] if 'rep_pen' in parsed_dict else 1.1 + max_length = int(parsed_dict['max_length'][0]) if 'max_length' in parsed_dict else 100 + temperature = float(parsed_dict['temperature'][0]) if 'temperature' in parsed_dict else 0.7 + top_k = int(parsed_dict['top_k'][0]) if 'top_k' in parsed_dict else 100 + top_p = float(parsed_dict['top_p'][0]) if 'top_p' in parsed_dict else 0.9 + rep_pen = float(parsed_dict['rep_pen'][0]) if 'rep_pen' in parsed_dict else 1.1 + use_default_badwordsids = int(parsed_dict['use_default_badwordsids'][0]) if 'use_default_badwordsids' in parsed_dict else 0 gencommand = (parsed_dict['generate'][0] if 'generate' in parsed_dict else "")=="Generate" - if prompt=="" or not gencommand or max_length<=0: - status = "Ready To Generate" - elif modelbusy.locked(): - status = "Model is busy, try again later." + if gencommand: + if prompt=="" or max_length<=0: + status = "Need a valid prompt and length to generate." + if modelbusy.locked(): + status = "Model is currently busy, try again later." + else: + epurl = f"http://localhost:{args.port}" + if args.host!="": + epurl = f"http://{args.host}:{args.port}" + gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"prompt": prompt,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen,"use_default_badwordsids":use_default_badwordsids} + respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload) + reply = html.escape(respjson["results"][0]["text"]) + status = "Generation Completed" + + if "generate" in parsed_dict: + del parsed_dict["generate"] + parsed_dict["prompt"] = prompt + reply + parsed_dict["status"] = status + updated_query_string = urlparse.urlencode(parsed_dict, doseq=True) + updated_path = parsed_url._replace(query=updated_query_string).geturl() + self.path = updated_path + self.send_response(302) + self.send_header("location", self.path) + self.end_headers(content_type='text/html') + return else: - epurl = f"http://localhost:{args.port}" - if args.host!="": - epurl = f"http://{args.host}:{args.port}" - gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"prompt": prompt,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen} - respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload) - reply = respjson["results"][0]["text"] - status = "Generation Completed" - - finalhtml = f''' - -
KoboldCpp can be used without Javascript enabled, however this is not recommended.
-
If you have Javascript, please use Kobold Lite WebUI instead.
KoboldCpp can be used without Javascript enabled, however this is not recommended.
+
If you have Javascript, please use Kobold Lite WebUI instead.