From 07cb01bcf35d653307551c92360a214d19d29bea Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:07:33 -0400 Subject: [PATCH] [Model] Add gemma-2 2b and 9b (#520) Add gemma-2 2b and 9b to prebuilt models, kept old gemma as well. --- examples/simple-chat-ts/src/simple_chat.ts | 2 +- src/config.ts | 124 +++++++++++++++++---- 2 files changed, 104 insertions(+), 22 deletions(-) diff --git a/examples/simple-chat-ts/src/simple_chat.ts b/examples/simple-chat-ts/src/simple_chat.ts index 90ed2ee1..7c882a7a 100644 --- a/examples/simple-chat-ts/src/simple_chat.ts +++ b/examples/simple-chat-ts/src/simple_chat.ts @@ -342,7 +342,7 @@ let engine: webllm.MLCEngineInterface; if (useWebWorker) { engine = new webllm.WebWorkerMLCEngine( new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), - { appConfig }, + { appConfig, logLevel: "INFO" }, ); } else { engine = new webllm.MLCEngine({ appConfig }); diff --git a/src/config.ts b/src/config.ts index a4f87d61..9cd126be 100644 --- a/src/config.ts +++ b/src/config.ts @@ -708,65 +708,88 @@ export const prebuiltAppConfig: AppConfig = { context_window_size: 2048, }, }, - // Gemma-2B + // Gemma2 { - model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", - model_id: "gemma-2b-it-q4f16_1-MLC", + model: "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f16_1-MLC", + model_id: "gemma-2-2b-it-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", - vram_required_MB: 1476.52, + "/gemma-2-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1895.3, low_resource_required: false, - buffer_size_required_bytes: 262144000, required_features: ["shader-f16"], overrides: { context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", - model_id: "gemma-2b-it-q4f32_1-MLC", + model: "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f32_1-MLC", + model_id: "gemma-2-2b-it-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", - vram_required_MB: 1750.66, + "/gemma-2-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 2508.75, low_resource_required: false, - buffer_size_required_bytes: 262144000, overrides: { context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", - model_id: "gemma-2b-it-q4f16_1-MLC-1k", + model: "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f16_1-MLC", + model_id: "gemma-2-2b-it-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + - "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", - vram_required_MB: 1476.52, + "/gemma-2-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1583.3, low_resource_required: true, - buffer_size_required_bytes: 262144000, required_features: ["shader-f16"], overrides: { context_window_size: 1024, }, }, { - model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", - model_id: "gemma-2b-it-q4f32_1-MLC-1k", + model: "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f32_1-MLC", + model_id: "gemma-2-2b-it-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + - "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", - vram_required_MB: 1750.66, + "/gemma-2-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1884.75, low_resource_required: true, - buffer_size_required_bytes: 262144000, overrides: { context_window_size: 1024, }, }, + { + model: "https://huggingface.co/mlc-ai/gemma-2-9b-it-q4f16_1-MLC", + model_id: "gemma-2-9b-it-q4f16_1-MLC", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2-9b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 6422.01, + low_resource_required: false, + required_features: ["shader-f16"], + overrides: { + context_window_size: 4096, + }, + }, + { + model: "https://huggingface.co/mlc-ai/gemma-2-9b-it-q4f32_1-MLC", + model_id: "gemma-2-9b-it-q4f32_1-MLC", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2-9b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 8383.33, + low_resource_required: false, + overrides: { + context_window_size: 4096, + }, + }, // Qwen-2 { model: "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC", @@ -1180,6 +1203,65 @@ export const prebuiltAppConfig: AppConfig = { context_window_size: 4096, }, }, + // Gemma-2B + { + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", + model_id: "gemma-2b-it-q4f16_1-MLC", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1476.52, + low_resource_required: false, + buffer_size_required_bytes: 262144000, + required_features: ["shader-f16"], + overrides: { + context_window_size: 4096, + }, + }, + { + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", + model_id: "gemma-2b-it-q4f32_1-MLC", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1750.66, + low_resource_required: false, + buffer_size_required_bytes: 262144000, + overrides: { + context_window_size: 4096, + }, + }, + { + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", + model_id: "gemma-2b-it-q4f16_1-MLC-1k", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1476.52, + low_resource_required: true, + buffer_size_required_bytes: 262144000, + required_features: ["shader-f16"], + overrides: { + context_window_size: 1024, + }, + }, + { + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", + model_id: "gemma-2b-it-q4f32_1-MLC-1k", + model_lib: + modelLibURLPrefix + + modelVersion + + "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1750.66, + low_resource_required: true, + buffer_size_required_bytes: 262144000, + overrides: { + context_window_size: 1024, + }, + }, // Phi-2 { model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC",