Skip to content

Commit

Permalink
Restructure code related to Wllama
Browse files Browse the repository at this point in the history
  • Loading branch information
felladrin committed May 18, 2024
1 parent d52f0c4 commit 22580ae
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 144 deletions.
110 changes: 9 additions & 101 deletions client/modules/textGeneration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,99 +226,7 @@ async function generateTextWithWebLlm() {
}

async function generateTextWithWllama() {
const { initializeWllama, runCompletion, exitWllama } = await import(
"./wllama"
);

const commonSamplingConfig: import("@wllama/wllama").SamplingConfig = {
temp: 0.35,
dynatemp_range: 0.25,
top_k: 0,
top_p: 1,
min_p: 0.05,
tfs_z: 0.95,
typical_p: 0.85,
penalty_freq: 0.5,
penalty_repeat: 1.176,
penalty_last_n: -1,
mirostat: 2,
mirostat_tau: 3.5,
};

const availableModels: {
[key in
| "mobileDefault"
| "mobileLarger"
| "desktopDefault"
| "desktopLarger"]: {
url: string | string[];
userPrefix: string;
assistantPrefix: string;
messageSuffix: string;
sampling: import("@wllama/wllama").SamplingConfig;
};
} = {
mobileDefault: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|im_start|>user\n",
assistantPrefix: "<|im_start|>assistant\n",
messageSuffix: "<|im_end|>\n",
sampling: commonSamplingConfig,
},
mobileLarger: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|user|>\n",
assistantPrefix: "<|assistant|>\n",
messageSuffix: "</s>\n",
sampling: commonSamplingConfig,
},
desktopDefault: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|im_start|>user\n",
assistantPrefix: "<|im_start|>assistant\n",
messageSuffix: "<|im_end|>\n",
sampling: commonSamplingConfig,
},
desktopLarger: {
url: Array.from(
{ length: 51 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00051.gguf`,
),
userPrefix: "<|user|>\n",
assistantPrefix: "<|assistant|>\n",
messageSuffix: "<|end|>\n",
sampling: commonSamplingConfig,
},
};
const { initializeWllama, availableModels } = await import("./wllama");

const defaultModel = isRunningOnMobile
? availableModels.mobileDefault
Expand All @@ -332,9 +240,11 @@ async function generateTextWithWllama() {

let loadingPercentage = 0;

await initializeWllama({
modelUrl: selectedModel.url,
modelConfig: {
const wllama = await initializeWllama(selectedModel.url, {
wllama: {
suppressNativeLog: !debug,
},
model: {
n_ctx: 2 * 1024,
n_threads: getNumberOfThreadsSetting(),
cache_type_k: "q4_0",
Expand Down Expand Up @@ -386,8 +296,7 @@ async function generateTextWithWllama() {

let isAnswering = false;

const completion = await runCompletion({
prompt,
const completion = await wllama.createCompletion(prompt, {
nPredict: 768,
sampling: selectedModel.sampling,
onNewToken: (_token, _piece, currentText, { abortSignal }) => {
Expand Down Expand Up @@ -427,8 +336,7 @@ async function generateTextWithWllama() {
["Answer:", "This text is about"].join("\n"),
].join("");

const completion = await runCompletion({
prompt,
const completion = await wllama.createCompletion(prompt, {
nPredict: 128,
sampling: selectedModel.sampling,
onNewToken: (_token, _piece, currentText, { abortSignal }) => {
Expand All @@ -450,7 +358,7 @@ async function generateTextWithWllama() {
}
}

await exitWllama();
await wllama.exit();
}

async function generateTextWithRatchet() {
Expand Down
152 changes: 109 additions & 43 deletions client/modules/wllama.ts
Original file line number Diff line number Diff line change
@@ -1,58 +1,124 @@
import {
DownloadModelConfig,
SamplingConfig,
Wllama,
AssetsPathConfig,
WllamaConfig,
SamplingConfig,
} from "@wllama/wllama";
import singleThreadWllamaJsUrl from "@wllama/wllama/esm/single-thread/wllama.js?url";
import singleThreadWllamaWasmUrl from "@wllama/wllama/esm/single-thread/wllama.wasm?url";
import multiThreadWllamaJsUrl from "@wllama/wllama/esm/multi-thread/wllama.js?url";
import multiThreadWllamaWasmUrl from "@wllama/wllama/esm/multi-thread/wllama.wasm?url";
import multiThreadWllamaWorkerMjsUrl from "@wllama/wllama/esm/multi-thread/wllama.worker.mjs?url";

let wllama: Wllama | undefined;

export async function initializeWllama(config: {
modelUrl: string | string[];
modelConfig?: DownloadModelConfig;
}) {
const wllamaConfigPaths: AssetsPathConfig = {
"single-thread/wllama.js": singleThreadWllamaJsUrl,
"single-thread/wllama.wasm": singleThreadWllamaWasmUrl,
"multi-thread/wllama.js": multiThreadWllamaJsUrl,
"multi-thread/wllama.wasm": multiThreadWllamaWasmUrl,
"multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl,
};

wllama = new Wllama(wllamaConfigPaths);

return wllama.loadModelFromUrl(config.modelUrl, config.modelConfig ?? {});
}

export async function runCompletion(config: {
prompt: string;
nPredict?: number;
sampling?: SamplingConfig;
onNewToken: (
token: number,
piece: Uint8Array,
currentText: string,
optionals: {
abortSignal: () => void;
export async function initializeWllama(
modelUrl: string | string[],
config?: {
wllama?: WllamaConfig;
model?: DownloadModelConfig;
},
) {
const wllama = new Wllama(
{
"single-thread/wllama.js": singleThreadWllamaJsUrl,
"single-thread/wllama.wasm": singleThreadWllamaWasmUrl,
"multi-thread/wllama.js": multiThreadWllamaJsUrl,
"multi-thread/wllama.wasm": multiThreadWllamaWasmUrl,
"multi-thread/wllama.worker.mjs": multiThreadWllamaWorkerMjsUrl,
},
) => void;
}) {
if (!wllama) throw new Error("Wllama is not initialized.");
config?.wllama,
);

await wllama.loadModelFromUrl(modelUrl, config?.model);

return wllama.createCompletion(config.prompt, {
nPredict: config.nPredict,
sampling: config.sampling,
onNewToken: config.onNewToken,
});
return wllama;
}

export async function exitWllama() {
if (wllama) await wllama.exit();
const commonSamplingConfig: SamplingConfig = {
temp: 0.35,
dynatemp_range: 0.25,
top_k: 0,
top_p: 1,
min_p: 0.05,
tfs_z: 0.95,
typical_p: 0.85,
penalty_freq: 0.5,
penalty_repeat: 1.176,
penalty_last_n: -1,
mirostat: 2,
mirostat_tau: 3.5,
};

wllama = undefined;
}
export const availableModels: {
[key in
| "mobileDefault"
| "mobileLarger"
| "desktopDefault"
| "desktopLarger"]: {
url: string | string[];
userPrefix: string;
assistantPrefix: string;
messageSuffix: string;
sampling: SamplingConfig;
};
} = {
mobileDefault: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-Llama-160M-Chat-v1/resolve/main/Llama-160M-Chat-v1.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|im_start|>user\n",
assistantPrefix: "<|im_start|>assistant\n",
messageSuffix: "<|im_end|>\n",
sampling: commonSamplingConfig,
},
mobileLarger: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-zephyr-220m-dpo-full/resolve/main/zephyr-220m-dpo-full.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|user|>\n",
assistantPrefix: "<|assistant|>\n",
messageSuffix: "</s>\n",
sampling: commonSamplingConfig,
},
desktopDefault: {
url: Array.from(
{ length: 7 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-stablelm-2-1_6b-chat/resolve/main/stablelm-2-1_6b-chat.Q8_0.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00007.gguf`,
),
userPrefix: "<|im_start|>user\n",
assistantPrefix: "<|im_start|>assistant\n",
messageSuffix: "<|im_end|>\n",
sampling: commonSamplingConfig,
},
desktopLarger: {
url: Array.from(
{ length: 51 },
(_, i) =>
`https://huggingface.co/Felladrin/gguf-sharded-Phi-3-mini-4k-instruct-iMat/resolve/main/phi-3-mini-4k-instruct-imat-Q5_K_M.shard-${(
i + 1
)
.toString()
.padStart(5, "0")}-of-00051.gguf`,
),
userPrefix: "<|user|>\n",
assistantPrefix: "<|assistant|>\n",
messageSuffix: "<|end|>\n",
sampling: commonSamplingConfig,
},
};

0 comments on commit 22580ae

Please sign in to comment.