diff --git a/client/modules/textGeneration.ts b/client/modules/textGeneration.ts index 9aaa6b0f..ddefb086 100644 --- a/client/modules/textGeneration.ts +++ b/client/modules/textGeneration.ts @@ -226,99 +226,7 @@ async function generateTextWithWebLlm() { } async function generateTextWithWllama() { - const { initializeWllama, runCompletion, exitWllama } = await import( - "./wllama" - ); - - const commonSamplingConfig: import("@wllama/wllama").SamplingConfig = { - temp: 0.35, - dynatemp_range: 0.25, - top_k: 0, - top_p: 1, - min_p: 0.05, - tfs_z: 0.95, - typical_p: 0.85, - penalty_freq: 0.5, - penalty_repeat: 1.176, - penalty_last_n: -1, - mirostat: 2, - mirostat_tau: 3.5, - }; - - const availableModels: { - [key in - | "mobileDefault" - | "mobileLarger" - | "desktopDefault" - | "desktopLarger"]: { - url: string | string[]; - userPrefix: string; - assistantPrefix: string; - messageSuffix: string; - sampling: import("@wllama/wllama").SamplingConfig; - }; - } = { - mobileDefault: { - url: Array.from( - { length: 7 }, - (_, i) => - `https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${( - i + 1 - ) - .toString() - .padStart(5, "0")}-of-00007.gguf`, - ), - userPrefix: "<|im_start|>user\n", - assistantPrefix: "<|im_start|>assistant\n", - messageSuffix: "<|im_end|>\n", - sampling: commonSamplingConfig, - }, - mobileLarger: { - url: Array.from( - { length: 7 }, - (_, i) => - `https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${( - i + 1 - ) - .toString() - .padStart(5, "0")}-of-00007.gguf`, - ), - userPrefix: "<|user|>\n", - assistantPrefix: "<|assistant|>\n", - messageSuffix: "\n", - sampling: commonSamplingConfig, - }, - desktopDefault: { - url: Array.from( - { length: 7 }, - (_, i) => - `https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${( - i + 1 - ) - .toString() - .padStart(5, "0")}-of-00007.gguf`, - ), - userPrefix: "<|im_start|>user\n", - assistantPrefix: "<|im_start|>assistant\n", - messageSuffix: "<|im_end|>\n", - sampling: commonSamplingConfig, - }, - desktopLarger: { - url: Array.from( - { length: 51 }, - (_, i) => - `https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${( - i + 1 - ) - .toString() - .padStart(5, "0")}-of-00051.gguf`, - ), - userPrefix: "<|user|>\n", - assistantPrefix: "<|assistant|>\n", - messageSuffix: "<|end|>\n", - sampling: commonSamplingConfig, - }, - }; + const { initializeWllama, availableModels } = await import("./wllama"); const defaultModel = isRunningOnMobile ? availableModels.mobileDefault @@ -332,9 +240,11 @@ async function generateTextWithWllama() { let loadingPercentage = 0; - await initializeWllama({ - modelUrl: selectedModel.url, - modelConfig: { + const wllama = await initializeWllama(selectedModel.url, { + wllama: { + suppressNativeLog: !debug, + }, + model: { n_ctx: 2 * 1024, n_threads: getNumberOfThreadsSetting(), cache_type_k: "q4_0", @@ -386,8 +296,7 @@ async function generateTextWithWllama() { let isAnswering = false; - const completion = await runCompletion({ - prompt, + const completion = await wllama.createCompletion(prompt, { nPredict: 768, sampling: selectedModel.sampling, onNewToken: (_token, _piece, currentText, { abortSignal }) => { @@ -427,8 +336,7 @@ async function generateTextWithWllama() { ["Answer:", "This text is about"].join("\n"), ].join(""); - const completion = await runCompletion({ - prompt, + const completion = await wllama.createCompletion(prompt, { nPredict: 128, sampling: selectedModel.sampling, onNewToken: (_token, _piece, currentText, { abortSignal }) => { @@ -450,7 +358,7 @@ async function generateTextWithWllama() { } } - await exitWllama(); + await wllama.exit(); } async function generateTextWithRatchet() { diff --git a/client/modules/wllama.ts b/client/modules/wllama.ts index d181dd02..141a9be1 100644 --- a/client/modules/wllama.ts +++ b/client/modules/wllama.ts @@ -1,8 +1,8 @@ import { DownloadModelConfig, - SamplingConfig, Wllama, - AssetsPathConfig, + WllamaConfig, + SamplingConfig, } from "@wllama/wllama"; import singleThreadWllamaJsUrl from "@wllama/wllama/esm/single-thread/wllama.js?url"; import singleThreadWllamaWasmUrl from "@wllama/wllama/esm/single-thread/wllama.wasm?url"; @@ -10,49 +10,115 @@ import multiThreadWllamaJsUrl from "@wllama/wllama/esm/multi-thread/wllama.js?ur import multiThreadWllamaWasmUrl from "@wllama/wllama/esm/multi-thread/wllama.wasm?url"; import multiThreadWllamaWorkerMjsUrl from "@wllama/wllama/esm/multi-thread/wllama.worker.mjs?url"; -let wllama: Wllama | undefined; - -export async function initializeWllama(config: { - modelUrl: string | string[]; - modelConfig?: DownloadModelConfig; -}) { - const wllamaConfigPaths: AssetsPathConfig = { - "single-thread/wllama.js": singleThreadWllamaJsUrl, - "single-thread/wllama.wasm": singleThreadWllamaWasmUrl, - "multi-thread/wllama.js": multiThreadWllamaJsUrl, - "multi-thread/wllama.wasm": multiThreadWllamaWasmUrl, - "multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl, - }; - - wllama = new Wllama(wllamaConfigPaths); - - return wllama.loadModelFromUrl(config.modelUrl, config.modelConfig ?? {}); -} - -export async function runCompletion(config: { - prompt: string; - nPredict?: number; - sampling?: SamplingConfig; - onNewToken: ( - token: number, - piece: Uint8Array, - currentText: string, - optionals: { - abortSignal: () => void; +export async function initializeWllama( + modelUrl: string | string[], + config?: { + wllama?: WllamaConfig; + model?: DownloadModelConfig; + }, +) { + const wllama = new Wllama( + { + "single-thread/wllama.js": singleThreadWllamaJsUrl, + "single-thread/wllama.wasm": singleThreadWllamaWasmUrl, + "multi-thread/wllama.js": multiThreadWllamaJsUrl, + "multi-thread/wllama.wasm": multiThreadWllamaWasmUrl, + "multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl, }, - ) => void; -}) { - if (!wllama) throw new Error("Wllama is not initialized."); + config?.wllama, + ); + + await wllama.loadModelFromUrl(modelUrl, config?.model); - return wllama.createCompletion(config.prompt, { - nPredict: config.nPredict, - sampling: config.sampling, - onNewToken: config.onNewToken, - }); + return wllama; } -export async function exitWllama() { - if (wllama) await wllama.exit(); +const commonSamplingConfig: SamplingConfig = { + temp: 0.35, + dynatemp_range: 0.25, + top_k: 0, + top_p: 1, + min_p: 0.05, + tfs_z: 0.95, + typical_p: 0.85, + penalty_freq: 0.5, + penalty_repeat: 1.176, + penalty_last_n: -1, + mirostat: 2, + mirostat_tau: 3.5, +}; - wllama = undefined; -} +export const availableModels: { + [key in + | "mobileDefault" + | "mobileLarger" + | "desktopDefault" + | "desktopLarger"]: { + url: string | string[]; + userPrefix: string; + assistantPrefix: string; + messageSuffix: string; + sampling: SamplingConfig; + }; +} = { + mobileDefault: { + url: Array.from( + { length: 7 }, + (_, i) => + `https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${( + i + 1 + ) + .toString() + .padStart(5, "0")}-of-00007.gguf`, + ), + userPrefix: "<|im_start|>user\n", + assistantPrefix: "<|im_start|>assistant\n", + messageSuffix: "<|im_end|>\n", + sampling: commonSamplingConfig, + }, + mobileLarger: { + url: Array.from( + { length: 7 }, + (_, i) => + `https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${( + i + 1 + ) + .toString() + .padStart(5, "0")}-of-00007.gguf`, + ), + userPrefix: "<|user|>\n", + assistantPrefix: "<|assistant|>\n", + messageSuffix: "\n", + sampling: commonSamplingConfig, + }, + desktopDefault: { + url: Array.from( + { length: 7 }, + (_, i) => + `https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${( + i + 1 + ) + .toString() + .padStart(5, "0")}-of-00007.gguf`, + ), + userPrefix: "<|im_start|>user\n", + assistantPrefix: "<|im_start|>assistant\n", + messageSuffix: "<|im_end|>\n", + sampling: commonSamplingConfig, + }, + desktopLarger: { + url: Array.from( + { length: 51 }, + (_, i) => + `https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${( + i + 1 + ) + .toString() + .padStart(5, "0")}-of-00051.gguf`, + ), + userPrefix: "<|user|>\n", + assistantPrefix: "<|assistant|>\n", + messageSuffix: "<|end|>\n", + sampling: commonSamplingConfig, + }, +};