Restructure code related to Wllama

felladrin · May 18, 2024 · 22580ae · 22580ae
1 parent d52f0c4
commit 22580ae
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 144 deletions.
diff --git a/client/modules/textGeneration.ts b/client/modules/textGeneration.ts
@@ -226,99 +226,7 @@ async function generateTextWithWebLlm() {
 }
 
 async function generateTextWithWllama() {
-  const { initializeWllama, runCompletion, exitWllama } = await import(
-    "./wllama"
-  );
-
-  const commonSamplingConfig: import("@wllama/wllama").SamplingConfig = {
-    temp: 0.35,
-    dynatemp_range: 0.25,
-    top_k: 0,
-    top_p: 1,
-    min_p: 0.05,
-    tfs_z: 0.95,
-    typical_p: 0.85,
-    penalty_freq: 0.5,
-    penalty_repeat: 1.176,
-    penalty_last_n: -1,
-    mirostat: 2,
-    mirostat_tau: 3.5,
-  };
-
-  const availableModels: {
-    [key in
-      | "mobileDefault"
-      | "mobileLarger"
-      | "desktopDefault"
-      | "desktopLarger"]: {
-      url: string | string[];
-      userPrefix: string;
-      assistantPrefix: string;
-      messageSuffix: string;
-      sampling: import("@wllama/wllama").SamplingConfig;
-    };
-  } = {
-    mobileDefault: {
-      url: Array.from(
-        { length: 7 },
-        (_, i) =>
-          `https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${(
-            i + 1
-          )
-            .toString()
-            .padStart(5, "0")}-of-00007.gguf`,
-      ),
-      userPrefix: "<|im_start|>user\n",
-      assistantPrefix: "<|im_start|>assistant\n",
-      messageSuffix: "<|im_end|>\n",
-      sampling: commonSamplingConfig,
-    },
-    mobileLarger: {
-      url: Array.from(
-        { length: 7 },
-        (_, i) =>
-          `https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${(
-            i + 1
-          )
-            .toString()
-            .padStart(5, "0")}-of-00007.gguf`,
-      ),
-      userPrefix: "<|user|>\n",
-      assistantPrefix: "<|assistant|>\n",
-      messageSuffix: "</s>\n",
-      sampling: commonSamplingConfig,
-    },
-    desktopDefault: {
-      url: Array.from(
-        { length: 7 },
-        (_, i) =>
-          `https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${(
-            i + 1
-          )
-            .toString()
-            .padStart(5, "0")}-of-00007.gguf`,
-      ),
-      userPrefix: "<|im_start|>user\n",
-      assistantPrefix: "<|im_start|>assistant\n",
-      messageSuffix: "<|im_end|>\n",
-      sampling: commonSamplingConfig,
-    },
-    desktopLarger: {
-      url: Array.from(
-        { length: 51 },
-        (_, i) =>
-          `https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${(
-            i + 1
-          )
-            .toString()
-            .padStart(5, "0")}-of-00051.gguf`,
-      ),
-      userPrefix: "<|user|>\n",
-      assistantPrefix: "<|assistant|>\n",
-      messageSuffix: "<|end|>\n",
-      sampling: commonSamplingConfig,
-    },
-  };
+  const { initializeWllama, availableModels } = await import("./wllama");
 
   const defaultModel = isRunningOnMobile
     ? availableModels.mobileDefault
@@ -332,9 +240,11 @@ async function generateTextWithWllama() {
 
   let loadingPercentage = 0;
 
-  await initializeWllama({
-    modelUrl: selectedModel.url,
-    modelConfig: {
+  const wllama = await initializeWllama(selectedModel.url, {
+    wllama: {
+      suppressNativeLog: !debug,
+    },
+    model: {
       n_ctx: 2 * 1024,
       n_threads: getNumberOfThreadsSetting(),
       cache_type_k: "q4_0",
@@ -386,8 +296,7 @@ async function generateTextWithWllama() {
 
     let isAnswering = false;
 
-    const completion = await runCompletion({
-      prompt,
+    const completion = await wllama.createCompletion(prompt, {
       nPredict: 768,
       sampling: selectedModel.sampling,
       onNewToken: (_token, _piece, currentText, { abortSignal }) => {
@@ -427,8 +336,7 @@ async function generateTextWithWllama() {
         ["Answer:", "This text is about"].join("\n"),
       ].join("");
 
-      const completion = await runCompletion({
-        prompt,
+      const completion = await wllama.createCompletion(prompt, {
         nPredict: 128,
         sampling: selectedModel.sampling,
         onNewToken: (_token, _piece, currentText, { abortSignal }) => {
@@ -450,7 +358,7 @@ async function generateTextWithWllama() {
     }
   }
 
-  await exitWllama();
+  await wllama.exit();
 }
 
 async function generateTextWithRatchet() {

diff --git a/client/modules/wllama.ts b/client/modules/wllama.ts
@@ -1,58 +1,124 @@
 import {
   DownloadModelConfig,
-  SamplingConfig,
   Wllama,
-  AssetsPathConfig,
+  WllamaConfig,
+  SamplingConfig,
 } from "@wllama/wllama";
 import singleThreadWllamaJsUrl from "@wllama/wllama/esm/single-thread/wllama.js?url";
 import singleThreadWllamaWasmUrl from "@wllama/wllama/esm/single-thread/wllama.wasm?url";
 import multiThreadWllamaJsUrl from "@wllama/wllama/esm/multi-thread/wllama.js?url";
 import multiThreadWllamaWasmUrl from "@wllama/wllama/esm/multi-thread/wllama.wasm?url";
 import multiThreadWllamaWorkerMjsUrl from "@wllama/wllama/esm/multi-thread/wllama.worker.mjs?url";
 
-let wllama: Wllama | undefined;
-
-export async function initializeWllama(config: {
-  modelUrl: string | string[];
-  modelConfig?: DownloadModelConfig;
-}) {
-  const wllamaConfigPaths: AssetsPathConfig = {
-    "single-thread/wllama.js": singleThreadWllamaJsUrl,
-    "single-thread/wllama.wasm": singleThreadWllamaWasmUrl,
-    "multi-thread/wllama.js": multiThreadWllamaJsUrl,
-    "multi-thread/wllama.wasm": multiThreadWllamaWasmUrl,
-    "multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl,
-  };
-
-  wllama = new Wllama(wllamaConfigPaths);
-
-  return wllama.loadModelFromUrl(config.modelUrl, config.modelConfig ?? {});
-}
-
-export async function runCompletion(config: {
-  prompt: string;
-  nPredict?: number;
-  sampling?: SamplingConfig;
-  onNewToken: (
-    token: number,
-    piece: Uint8Array,
-    currentText: string,
-    optionals: {
-      abortSignal: () => void;
+export async function initializeWllama(
+  modelUrl: string | string[],
+  config?: {
+    wllama?: WllamaConfig;
+    model?: DownloadModelConfig;
+  },
+) {
+  const wllama = new Wllama(
+    {
+      "single-thread/wllama.js": singleThreadWllamaJsUrl,
+      "single-thread/wllama.wasm": singleThreadWllamaWasmUrl,
+      "multi-thread/wllama.js": multiThreadWllamaJsUrl,
+      "multi-thread/wllama.wasm": multiThreadWllamaWasmUrl,
+      "multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl,
     },
-  ) => void;
-}) {
-  if (!wllama) throw new Error("Wllama is not initialized.");
+    config?.wllama,
+  );
+
+  await wllama.loadModelFromUrl(modelUrl, config?.model);
 
-  return wllama.createCompletion(config.prompt, {
-    nPredict: config.nPredict,
-    sampling: config.sampling,
-    onNewToken: config.onNewToken,
-  });
+  return wllama;
 }
 
-export async function exitWllama() {
-  if (wllama) await wllama.exit();
+const commonSamplingConfig: SamplingConfig = {
+  temp: 0.35,
+  dynatemp_range: 0.25,
+  top_k: 0,
+  top_p: 1,
+  min_p: 0.05,
+  tfs_z: 0.95,
+  typical_p: 0.85,
+  penalty_freq: 0.5,
+  penalty_repeat: 1.176,
+  penalty_last_n: -1,
+  mirostat: 2,
+  mirostat_tau: 3.5,
+};
 
-  wllama = undefined;
-}
+export const availableModels: {
+  [key in
+    | "mobileDefault"
+    | "mobileLarger"
+    | "desktopDefault"
+    | "desktopLarger"]: {
+    url: string | string[];
+    userPrefix: string;
+    assistantPrefix: string;
+    messageSuffix: string;
+    sampling: SamplingConfig;
+  };
+} = {
+  mobileDefault: {
+    url: Array.from(
+      { length: 7 },
+      (_, i) =>
+        `https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${(
+          i + 1
+        )
+          .toString()
+          .padStart(5, "0")}-of-00007.gguf`,
+    ),
+    userPrefix: "<|im_start|>user\n",
+    assistantPrefix: "<|im_start|>assistant\n",
+    messageSuffix: "<|im_end|>\n",
+    sampling: commonSamplingConfig,
+  },
+  mobileLarger: {
+    url: Array.from(
+      { length: 7 },
+      (_, i) =>
+        `https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${(
+          i + 1
+        )
+          .toString()
+          .padStart(5, "0")}-of-00007.gguf`,
+    ),
+    userPrefix: "<|user|>\n",
+    assistantPrefix: "<|assistant|>\n",
+    messageSuffix: "</s>\n",
+    sampling: commonSamplingConfig,
+  },
+  desktopDefault: {
+    url: Array.from(
+      { length: 7 },
+      (_, i) =>
+        `https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${(
+          i + 1
+        )
+          .toString()
+          .padStart(5, "0")}-of-00007.gguf`,
+    ),
+    userPrefix: "<|im_start|>user\n",
+    assistantPrefix: "<|im_start|>assistant\n",
+    messageSuffix: "<|im_end|>\n",
+    sampling: commonSamplingConfig,
+  },
+  desktopLarger: {
+    url: Array.from(
+      { length: 51 },
+      (_, i) =>
+        `https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${(
+          i + 1
+        )
+          .toString()
+          .padStart(5, "0")}-of-00051.gguf`,
+    ),
+    userPrefix: "<|user|>\n",
+    assistantPrefix: "<|assistant|>\n",
+    messageSuffix: "<|end|>\n",
+    sampling: commonSamplingConfig,
+  },
+};