From 826334b33a793f0485bd869de805a897199fae28 Mon Sep 17 00:00:00 2001 From: "Gilad S." Date: Tue, 30 Jul 2024 04:18:57 +0300 Subject: [PATCH] feat(model downloader): use `HF_TOKEN` when needed (#276) * feat(model downloader): use `HF_TOKEN` when needed * fix: update model recommendations --- llama/addon/addon.cpp | 1 - package-lock.json | 173 ++++++++++++++++-- package.json | 6 +- src/cli/recommendedModels.ts | 46 ++--- src/gguf/insights/GgufInsights.ts | 4 +- src/utils/createModelDownloader.ts | 66 ++++++- .../electron-typescript-react/src/App/App.tsx | 2 +- 7 files changed, 247 insertions(+), 51 deletions(-) diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp index 83b2b503..4de59653 100644 --- a/llama/addon/addon.cpp +++ b/llama/addon/addon.cpp @@ -57,7 +57,6 @@ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) { consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16))); consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32))); consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead())); - consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE)); consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos))); consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id))); diff --git a/package-lock.json b/package-lock.json index 3a0ea8af..b8f689bf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,7 +22,7 @@ "filenamify": "^6.0.0", "fs-extra": "^11.2.0", "ignore": "^5.3.1", - "ipull": "^3.3.0", + "ipull": "^3.6.0", "is-unicode-supported": "^2.0.0", "lifecycle-utils": "^1.4.1", "log-symbols": "^6.0.0", @@ -88,6 +88,10 @@ "type": "github", "url": "https://github.com/sponsors/giladgd" }, + "optionalDependencies": { + "@node-llama-cpp/linux-x64-cuda": "0.1.0", + "@node-llama-cpp/win-x64-cuda": "0.1.0" + }, "peerDependencies": { "typescript": ">=5.0.0" }, @@ -1915,6 +1919,154 @@ "integrity": "sha512-j7P6Rgr3mmtdkeDGTe0E/aYyWEWVtc5yFXtHCRHs28/jptDEWfaVOc5T7cblqy1XKPPfCxJc/8DwQ5YgLOZOVQ==", "dev": true }, + "node_modules/@reflink/reflink": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink/-/reflink-0.1.16.tgz", + "integrity": "sha512-i2zYt2FH1CE/1HUwK96HcwiahGhaS4wSCgaUnlIrl/4bxTnaZ0T/sYcLJ5VNSrbuczWjtyJ4WUROB+qMcRI9jA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@reflink/reflink-darwin-arm64": "0.1.16", + "@reflink/reflink-darwin-x64": "0.1.16", + "@reflink/reflink-linux-arm64-gnu": "0.1.16", + "@reflink/reflink-linux-arm64-musl": "0.1.16", + "@reflink/reflink-linux-x64-gnu": "0.1.16", + "@reflink/reflink-linux-x64-musl": "0.1.16", + "@reflink/reflink-win32-arm64-msvc": "0.1.16", + "@reflink/reflink-win32-x64-msvc": "0.1.16" + } + }, + "node_modules/@reflink/reflink-darwin-arm64": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-darwin-arm64/-/reflink-darwin-arm64-0.1.16.tgz", + "integrity": "sha512-s61AeZ0br2LtqOl2Rbq0k833hQ00sXJ+l9LGJmjM53dupWft3HEX9C5WUIMDDiU2Scx7f7UKAE4DvIvv7XjBWQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-darwin-x64": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-darwin-x64/-/reflink-darwin-x64-0.1.16.tgz", + "integrity": "sha512-ssrJj3K0Euua2LAkA4ff5y693wGKUHfznrGeWWtMw2aoLZRAH+C9Ne5oQvmcPPEK6wa929nRhA0ABrvhUa9mvA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-linux-arm64-gnu": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-linux-arm64-gnu/-/reflink-linux-arm64-gnu-0.1.16.tgz", + "integrity": "sha512-I4PCAcsAKFRSfOSHdz+rck6ARg4jzo4PvVqcnS2odcXy1Inbehxk3IcKBpHnuuDbXRCUoWV6NP7wSx1wG7ZBuA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-linux-arm64-musl": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-linux-arm64-musl/-/reflink-linux-arm64-musl-0.1.16.tgz", + "integrity": "sha512-xzcdtfwTXWUzN5yHdJgCdyAZSBO0faSgTqGdT4QKDxGHmiokf7+tgVBd6bU2nT4sL26AiIFyIBwp8buXGQYyaw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-linux-x64-gnu": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-linux-x64-gnu/-/reflink-linux-x64-gnu-0.1.16.tgz", + "integrity": "sha512-4/jscn1A/hx6maOowUjcvIs7YBs0fj//1vxB16TdMYk3tH9FHNmMBv5Pvw8eeRDimAzHP9fQJ9/t4dR6HCf32w==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-linux-x64-musl": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-linux-x64-musl/-/reflink-linux-x64-musl-0.1.16.tgz", + "integrity": "sha512-03kRXoAXhS/ZKxU2TKax59mLyKP7mev0EoIs+yXejUQo6D4uU46j+Sc243xMp72jRTgbWV4hQykcov98KtXEKQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-win32-arm64-msvc": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-win32-arm64-msvc/-/reflink-win32-arm64-msvc-0.1.16.tgz", + "integrity": "sha512-N7r+6YB3vXijs7PF3eg306B5s82hGS2TzsMM4+B9DNN9sbvN2yV5HQw29zyCXHY9c9SLe5kEzERp0rsDtN+6TA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@reflink/reflink-win32-x64-msvc": { + "version": "0.1.16", + "resolved": "https://registry.npmjs.org/@reflink/reflink-win32-x64-msvc/-/reflink-win32-x64-msvc-0.1.16.tgz", + "integrity": "sha512-CaslGjfhpvtjHqr8Cw1MhkYZAkcLWFiL1pMXOPv4fwngtLC5/OlcL/Y4Rw2QEZwDvPG3gaeY7pjF1NYEGnDrZA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.18.0", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.18.0.tgz", @@ -2740,14 +2892,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@supercharge/promise-pool": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/@supercharge/promise-pool/-/promise-pool-3.2.0.tgz", - "integrity": "sha512-pj0cAALblTZBPtMltWOlZTQSLT07jIaFNeM8TWoJD1cQMgDB9mcMlVMoetiB35OzNJpqQ2b+QEtwiR9f20mADg==", - "engines": { - "node": ">=8" - } - }, "node_modules/@tinyhttp/content-disposition": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/@tinyhttp/content-disposition/-/content-disposition-2.2.0.tgz", @@ -7035,11 +7179,11 @@ } }, "node_modules/ipull": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/ipull/-/ipull-3.3.0.tgz", - "integrity": "sha512-Q90FhMmHXxVTc2jCfbjJIU8JaaymoT6a6G0K6vemEGUa8PTt24LY5fVCH//KSMrJNbZmJSQ0lRt1ac3iwo8gOA==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/ipull/-/ipull-3.6.0.tgz", + "integrity": "sha512-oPN5iXxZ9xB3mHGS8zDZVJLgcXCSxy4lPNiNBrVVLDWaSJ7XlJIkn2CQRR5B8GI+aVi4HsiIfvXoJY402A+2pg==", + "license": "MIT", "dependencies": { - "@supercharge/promise-pool": "^3.2.0", "@tinyhttp/content-disposition": "^2.2.0", "async-retry": "^1.3.3", "chalk": "^5.3.0", @@ -7068,6 +7212,9 @@ "funding": { "type": "github", "url": "https://github.com/ido-pluto/ipull?sponsor=1" + }, + "optionalDependencies": { + "@reflink/reflink": "^0.1.16" } }, "node_modules/ipull/node_modules/parse-ms": { diff --git a/package.json b/package.json index 707a30f8..99f7459e 100644 --- a/package.json +++ b/package.json @@ -165,7 +165,7 @@ "filenamify": "^6.0.0", "fs-extra": "^11.2.0", "ignore": "^5.3.1", - "ipull": "^3.3.0", + "ipull": "^3.6.0", "is-unicode-supported": "^2.0.0", "lifecycle-utils": "^1.4.1", "log-symbols": "^6.0.0", @@ -193,7 +193,7 @@ } }, "optionalDependencies": { - "@node-llama-cpp/win-x64-cuda": "0.1.0", - "@node-llama-cpp/linux-x64-cuda": "0.1.0" + "@node-llama-cpp/linux-x64-cuda": "0.1.0", + "@node-llama-cpp/win-x64-cuda": "0.1.0" } } diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts index 9560a660..f1c02b93 100644 --- a/src/cli/recommendedModels.ts +++ b/src/cli/recommendedModels.ts @@ -8,27 +8,21 @@ export const recommendedModels: ModelRecommendation[] = [{ fileOptions: [{ huggingFace: { - model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-8B-Instruct-Q8_0.gguf" + file: "Meta-Llama-3.1-8B-Instruct.Q8_0.gguf" } }, { huggingFace: { - model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf" + file: "Meta-Llama-3.1-8B-Instruct.Q6_K.gguf" } }, { huggingFace: { - model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-8B-Instruct-Q5_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" + file: "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf" } }] }, { @@ -40,33 +34,27 @@ export const recommendedModels: ModelRecommendation[] = [{ fileOptions: [{ huggingFace: { - model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-70B-Instruct-Q8_0/Meta-Llama-3.1-70B-Instruct-Q8_0-00001-of-00002.gguf" + file: "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2" } }, { huggingFace: { - model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-70B-Instruct-Q6_K_L/Meta-Llama-3.1-70B-Instruct-Q6_K_L-00001-of-00002.gguf" + file: "Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2" } }, { huggingFace: { - model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf" + file: "Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf" } }, { huggingFace: { - model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", + model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", branch: "main", - file: "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-70B-Instruct-IQ4_XS.gguf" + file: "Meta-Llama-3.1-70B-Instruct.Q4_K_S.gguf" } }] }, { @@ -82,6 +70,12 @@ export const recommendedModels: ModelRecommendation[] = [{ branch: "main", file: "Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5" } + }, { + huggingFace: { + model: "mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF", + branch: "main", + file: "Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4" + } }] }, { name: "Phi 3 3.8B", diff --git a/src/gguf/insights/GgufInsights.ts b/src/gguf/insights/GgufInsights.ts index 60e3d5da..e6e4bca3 100644 --- a/src/gguf/insights/GgufInsights.ts +++ b/src/gguf/insights/GgufInsights.ts @@ -145,9 +145,9 @@ export class GgufInsights { const uint32TBytes = 4; // sizeof(uint32_t) const int32TBytes = 4; // sizeof(int32_t) - // source: `llama_get_state_size` in `llama.cpp` + // source: `llama_state_get_size` in `llama.cpp` const sRngSize = sizeTBytes; - const sRng = this._llama._consts.llamaMaxRngState; + const sRng = 64 * 1024; // LLAMA_MAX_RNG_STATE const sNOutputs = sizeTBytes; const sNOutputPos = batchSize * int32TBytes; const sLogitsSize = sizeTBytes; diff --git a/src/utils/createModelDownloader.ts b/src/utils/createModelDownloader.ts index ba6b55dd..22fe5c51 100644 --- a/src/utils/createModelDownloader.ts +++ b/src/utils/createModelDownloader.ts @@ -1,5 +1,6 @@ import process from "process"; import path from "path"; +import os from "os"; import {DownloadEngineMultiDownload, DownloadEngineNodejs, downloadFile, downloadSequence} from "ipull"; import fs from "fs-extra"; import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js"; @@ -46,7 +47,11 @@ export type ModelDownloaderOptions = { * * Defaults to `4`. */ - parallelDownloads?: number + parallelDownloads?: number, + + tokens?: { + huggingFace?: string + } }; /** @@ -95,6 +100,7 @@ export class ModelDownloader { /** @internal */ private readonly _headers?: Record; /** @internal */ private readonly _showCliProgress: boolean; /** @internal */ private readonly _onProgress?: ModelDownloaderOptions["onProgress"]; + /** @internal */ private readonly _tokens?: ModelDownloaderOptions["tokens"]; /** @internal */ private readonly _deleteTempFileOnCancel: boolean; /** @internal */ private readonly _skipExisting: boolean; /** @internal */ private readonly _parallelDownloads: number; @@ -104,10 +110,11 @@ export class ModelDownloader { /** @internal */ private _entrypointFilename?: string; /** @internal */ private _splitBinaryParts?: number; /** @internal */ private _totalFiles?: number; + /** @internal */ private _tryHeaders: Record[] = []; private constructor({ modelUrl, dirPath = cliModelsDirectory, fileName, headers, showCliProgress = false, onProgress, deleteTempFileOnCancel = true, - skipExisting = true, parallelDownloads = 4 + skipExisting = true, parallelDownloads = 4, tokens }: ModelDownloaderOptions) { if (modelUrl == null || dirPath == null) throw new Error("modelUrl and dirPath cannot be null"); @@ -121,6 +128,7 @@ export class ModelDownloader { this._deleteTempFileOnCancel = deleteTempFileOnCancel; this._skipExisting = skipExisting; this._parallelDownloads = parallelDownloads; + this._tokens = tokens; this._onDownloadProgress = this._onDownloadProgress.bind(this); } @@ -247,8 +255,29 @@ export class ModelDownloader { }); } + /** @internal */ + private async resolveTryHeaders() { + if (this._tokens == null) + return; + + const {huggingFace} = this._tokens; + + const [ + hfToken + ] = await Promise.all([ + resolveHfToken(huggingFace) + ]); + + if (hfToken != null && hfToken !== "") + this._tryHeaders?.push({ + ...(this._headers ?? {}), + "Authorization": `Bearer ${hfToken}` + }); + } + /** @internal */ public async _init() { + await this.resolveTryHeaders(); const binarySplitPartUrls = resolveBinarySplitGgufPartUrls(this._modelUrl); await fs.ensureDir(this._dirPath); @@ -258,7 +287,8 @@ export class ModelDownloader { directory: this._dirPath, fileName: this._fileName ?? getFilenameForBinarySplitGgufPartUrls(binarySplitPartUrls), cliProgress: this._showCliProgress, - headers: this._headers ?? {} + headers: this._headers ?? {}, + tryHeaders: this._tryHeaders.slice() }); this._specificFileDownloaders.push(this._downloader); @@ -279,7 +309,8 @@ export class ModelDownloader { directory: this._dirPath, fileName: this._fileName ?? undefined, cliProgress: this._showCliProgress, - headers: this._headers ?? {} + headers: this._headers ?? {}, + tryHeaders: this._tryHeaders.slice() }); this._specificFileDownloaders.push(this._downloader); @@ -298,7 +329,8 @@ export class ModelDownloader { fileName: this._fileName != null ? createSplitPartFilename(this._fileName, index + 1, splitGgufPartUrls.length) : undefined, - headers: this._headers ?? {} + headers: this._headers ?? {}, + tryHeaders: this._tryHeaders.slice() })); this._downloader = await downloadSequence( @@ -325,3 +357,27 @@ export class ModelDownloader { return new ModelDownloader(options); } } + +async function resolveHfToken(providedToken?: string) { + if (providedToken !== null) + return providedToken; + + if (process.env.HF_TOKEN != null) + return process.env.HF_TOKEN; + + const hfHomePath = process.env.HF_HOME || + path.join(process.env.XDG_CACHE_HOME || path.join(os.homedir(), ".cache"), "huggingface"); + + const hfTokenPath = process.env.HF_TOKEN_PATH || path.join(hfHomePath, "token"); + try { + if (await fs.pathExists(hfTokenPath)) { + const token = (await fs.readFile(hfTokenPath, "utf8")).trim(); + if (token !== "") + return token; + } + } catch (err) { + // do nothing + } + + return undefined; +} diff --git a/templates/electron-typescript-react/src/App/App.tsx b/templates/electron-typescript-react/src/App/App.tsx index 79d847c2..554243ed 100644 --- a/templates/electron-typescript-react/src/App/App.tsx +++ b/templates/electron-typescript-react/src/App/App.tsx @@ -119,7 +119,7 @@ export function App() {
+ href="https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf">
Get Llama 3.1 8B model