feat(model downloader): use HF_TOKEN when needed (#276)

* feat(model downloader): use `HF_TOKEN` when needed * fix: update model recommendations
withcatai · Jul 30, 2024 · 826334b · 826334b
1 parent e3e0994
commit 826334b
Show file tree

Hide file tree

Showing 7 changed files with 247 additions and 51 deletions.
diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp
@@ -57,7 +57,6 @@ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
     consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
     consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
     consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
-    consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
     consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
     consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
 

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -165,7 +165,7 @@
     "filenamify": "^6.0.0",
     "fs-extra": "^11.2.0",
     "ignore": "^5.3.1",
-    "ipull": "^3.3.0",
+    "ipull": "^3.6.0",
     "is-unicode-supported": "^2.0.0",
     "lifecycle-utils": "^1.4.1",
     "log-symbols": "^6.0.0",
@@ -193,7 +193,7 @@
     }
   },
   "optionalDependencies": {
-    "@node-llama-cpp/win-x64-cuda": "0.1.0",
-    "@node-llama-cpp/linux-x64-cuda": "0.1.0"
+    "@node-llama-cpp/linux-x64-cuda": "0.1.0",
+    "@node-llama-cpp/win-x64-cuda": "0.1.0"
   }
 }
diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts
@@ -8,27 +8,21 @@ export const recommendedModels: ModelRecommendation[] = [{
 
     fileOptions: [{
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"
+            file: "Meta-Llama-3.1-8B-Instruct.Q8_0.gguf"
         }
     }, {
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf"
+            file: "Meta-Llama-3.1-8B-Instruct.Q6_K.gguf"
         }
     }, {
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-8B-Instruct-Q5_K_L.gguf"
-        }
-    }, {
-        huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
-            branch: "main",
-            file: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
+            file: "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf"
         }
     }]
 }, {
@@ -40,33 +34,27 @@ export const recommendedModels: ModelRecommendation[] = [{
 
     fileOptions: [{
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-70B-Instruct-Q8_0/Meta-Llama-3.1-70B-Instruct-Q8_0-00001-of-00002.gguf"
+            file: "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2"
         }
     }, {
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-70B-Instruct-Q6_K_L/Meta-Llama-3.1-70B-Instruct-Q6_K_L-00001-of-00002.gguf"
+            file: "Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2"
         }
     }, {
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf"
+            file: "Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf"
         }
     }, {
         huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
+            model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
             branch: "main",
-            file: "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf"
-        }
-    }, {
-        huggingFace: {
-            model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
-            branch: "main",
-            file: "Meta-Llama-3.1-70B-Instruct-IQ4_XS.gguf"
+            file: "Meta-Llama-3.1-70B-Instruct.Q4_K_S.gguf"
         }
     }]
 }, {
@@ -82,6 +70,12 @@ export const recommendedModels: ModelRecommendation[] = [{
             branch: "main",
             file: "Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5"
         }
+    }, {
+        huggingFace: {
+            model: "mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF",
+            branch: "main",
+            file: "Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4"
+        }
     }]
 }, {
     name: "Phi 3 3.8B",

diff --git a/src/gguf/insights/GgufInsights.ts b/src/gguf/insights/GgufInsights.ts
@@ -145,9 +145,9 @@ export class GgufInsights {
         const uint32TBytes = 4; // sizeof(uint32_t)
         const int32TBytes = 4; // sizeof(int32_t)
 
-        // source: `llama_get_state_size` in `llama.cpp`
+        // source: `llama_state_get_size` in `llama.cpp`
         const sRngSize = sizeTBytes;
-        const sRng = this._llama._consts.llamaMaxRngState;
+        const sRng = 64 * 1024; // LLAMA_MAX_RNG_STATE
         const sNOutputs = sizeTBytes;
         const sNOutputPos = batchSize * int32TBytes;
         const sLogitsSize = sizeTBytes;