From fe45d7fb0320d4f17ae6808327d9b8076439a97a Mon Sep 17 00:00:00 2001
From: Ryan526 <rjalynski@gmail.com>
Date: Wed, 9 Oct 2024 10:58:48 -0400
Subject: [PATCH 1/3] Add o1 Models - use max_completion_tokens instead of
 max_tokens

---
 src/lib/ChatRequest.svelte              |  2 +-
 src/lib/ChatSettingsModal.svelte        |  2 +-
 src/lib/Profiles.svelte                 |  2 +-
 src/lib/Settings.svelte                 |  6 +++---
 src/lib/Types.svelte                    |  2 +-
 src/lib/providers/openai/models.svelte  | 16 +++++++++++++++-
 src/lib/providers/petals/request.svelte |  2 +-
 7 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte
index 888b5ca1..14bb64e9 100644
--- a/src/lib/ChatRequest.svelte
+++ b/src/lib/ChatRequest.svelte
@@ -191,7 +191,7 @@ export class ChatRequest {
             if (typeof setting.apiTransform === 'function') {
               value = setting.apiTransform(chatId, setting, value)
             }
-            if (key === 'max_tokens') {
+            if (key === 'max_completion_tokens') {
               if (opts.maxTokens) value = opts.maxTokens // only as large as requested
               if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
               if (value) value = Math.floor(value)
diff --git a/src/lib/ChatSettingsModal.svelte b/src/lib/ChatSettingsModal.svelte
index 8e822bbe..327c4e61 100644
--- a/src/lib/ChatSettingsModal.svelte
+++ b/src/lib/ChatSettingsModal.svelte
@@ -186,7 +186,7 @@
     const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
     profileSelect.options = await getProfileSelect()
     chatDefaults.profile = await getDefaultProfileKey()
-    chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
+    chatDefaults.max_completion_tokens = getModelMaxTokens(chatSettings.model)
     // const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
     defaultProfile = await getDefaultProfileKey()
     isDefault = defaultProfile === chatSettings.profile
diff --git a/src/lib/Profiles.svelte b/src/lib/Profiles.svelte
index 91c07a8e..b660b6c8 100644
--- a/src/lib/Profiles.svelte
+++ b/src/lib/Profiles.svelte
@@ -236,7 +236,7 @@ The user has walked in on [[CHARACTER_NAME]]. They are on the bridge of the Hear
       profileName: 'Jen - An uninhibited virtual friend [LLaMA]',
       temperature: 0.8,
       top_p: 0.5,
-      max_tokens: 4096,
+      max_completion_tokens: 4096,
       profileDescription: "*** NOT for OpenAI / ChatGPT models ***\n\nA profile attempting a coherent, unrestricted, playful and helpful chat with Stable Beluga 2 and other LLaMA instruct (non-chat) models running on Petals. The goal is to have an assistant that can talk to you about any topic and answer any question without lecturing you and continuously reminding it can't do things because it's only an AI language model. If you have a better a profile, I'm sure @Niek wouldn't mind a pull request or issue opened.\n\nNote that chat with Llama 2 models under Petals can fall apart quickly, devolving into repetitive responses and catch-phrases. The repetitionPentalty settings helps with that, but then also can keep it from repeating layout patterns you're prompting for, so it can be a delicate balance.\n\nThis profile uses:\n- A system prompt designed for character simulation\n- Modified delimiters, etc., to try to keep chat cohearent\n- A summary prompt",
       continuousChat: 'summary',
       summaryThreshold: 0.8,
diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte
index f46992d7..0e5b3ccc 100644
--- a/src/lib/Settings.svelte
+++ b/src/lib/Settings.svelte
@@ -81,7 +81,7 @@ const gptDefaults = {
   n: 1,
   stream: true,
   stop: null,
-  max_tokens: 512,
+  max_completion_tokens: 512,
   presence_penalty: 0,
   frequency_penalty: 0,
   logit_bias: null,
@@ -496,11 +496,11 @@ const chatSettingsList: ChatSetting[] = [
         hide: hideModelSetting
       },
       {
-        key: 'max_tokens',
+        key: 'max_completion_tokens',
         name: 'Max Tokens',
         title: 'The maximum number of tokens to generate in the completion.\n' +
               '\n' +
-              'The token count of your prompt plus max_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
+              'The token count of your prompt plus max_completion_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
         min: 1,
         max: 32768,
         step: 1,
diff --git a/src/lib/Types.svelte b/src/lib/Types.svelte
index 0454bd15..16561684 100644
--- a/src/lib/Types.svelte
+++ b/src/lib/Types.svelte
@@ -52,7 +52,7 @@ export type Request = {
     n?: number;
     stream?: boolean;
     stop?: string | null;
-    max_tokens?: number;
+    max_completion_tokens?: number;
     presence_penalty?: number;
     frequency_penalty?: number;
     logit_bias?: Record<string, number> | null;
diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte
index 8affc3ae..66fb6cdc 100644
--- a/src/lib/providers/openai/models.svelte
+++ b/src/lib/providers/openai/models.svelte
@@ -87,6 +87,18 @@ const gpt4omini = {
       completion: 0.00000060, // $0.00060 per 1000 tokens completion
       max: 131072 // 128k max token buffer
 }
+const o1preview = {
+      ...chatModelBase,
+      prompt: 0.000015, // $0.015 per 1000 tokens prompt
+      completion: 0.00006, // $0.06 per 1000 tokens completion
+      max: 131072 // 128k max token buffer
+}
+const o1mini = {
+      ...chatModelBase,
+      prompt: 0.000003, // $0.003 per 1000 tokens prompt
+      completion: 0.000012, // $0.012 per 1000 tokens completion
+      max: 131072 // 128k max token buffer
+}
 const gpt432k = {
       ...chatModelBase,
       prompt: 0.00006, // $0.06 per 1000 tokens prompt
@@ -129,7 +141,9 @@ export const chatModels : Record<string, ModelDetail> = {
   'gpt-4-0125-preview': { ...gpt4128kpreview },
   'gpt-4-32k': { ...gpt432k },
   'gpt-4-32k-0314': { ...gpt432k },
-  'gpt-4-32k-0613': { ...gpt432k }
+  'gpt-4-32k-0613': { ...gpt432k },
+  'o1-preview': { ...o1preview },
+  'o1-mini': { ...o1mini }
 }
 
 export const fetchRemoteModels = async () => {
diff --git a/src/lib/providers/petals/request.svelte b/src/lib/providers/petals/request.svelte
index 8d5b8e50..40dfdc67 100644
--- a/src/lib/providers/petals/request.svelte
+++ b/src/lib/providers/petals/request.svelte
@@ -165,7 +165,7 @@ export const chatRequest = async (
         !chatSettings.holdSocket && ws.close()
       })
 
-      let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
+      let maxLen = Math.min(opts.maxTokens || chatSettings.max_completion_tokens || maxTokens, maxTokens)
 
       let midDel = ''
       for (let i = 0, l = delimiter.length; i < l; i++) {

From 5db2e456b61a2166e1635d0e03f5d1fb8facb5cd Mon Sep 17 00:00:00 2001
From: Ryan526 <rjalynski@gmail.com>
Date: Wed, 16 Oct 2024 20:08:16 -0400
Subject: [PATCH 2/3] remove max tokens from suggestName

---
 src/lib/Chat.svelte | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/lib/Chat.svelte b/src/lib/Chat.svelte
index 2c90ed86..59601955 100644
--- a/src/lib/Chat.svelte
+++ b/src/lib/Chat.svelte
@@ -305,7 +305,6 @@
       autoAddMessages: false,
       streaming: false,
       summaryRequest: true,
-      maxTokens: 30
     })
 
     try {

From 9a2565a7d3e0450483c24d76fe039921a211828f Mon Sep 17 00:00:00 2001
From: Ryan526 <rjalynski@gmail.com>
Date: Wed, 16 Oct 2024 20:10:29 -0400
Subject: [PATCH 3/3] ,

---
 src/lib/Chat.svelte | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/Chat.svelte b/src/lib/Chat.svelte
index 59601955..db711425 100644
--- a/src/lib/Chat.svelte
+++ b/src/lib/Chat.svelte
@@ -304,7 +304,7 @@
       chat,
       autoAddMessages: false,
       streaming: false,
-      summaryRequest: true,
+      summaryRequest: true
     })
 
     try {