Skip to content

Commit

Permalink
feat(model downloader): use HF_TOKEN when needed (#276)
Browse files Browse the repository at this point in the history
* feat(model downloader): use `HF_TOKEN` when needed
* fix: update model recommendations
  • Loading branch information
giladgd authored Jul 30, 2024
1 parent e3e0994 commit 826334b
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 51 deletions.
1 change: 0 additions & 1 deletion llama/addon/addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));

Expand Down
173 changes: 160 additions & 13 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
"filenamify": "^6.0.0",
"fs-extra": "^11.2.0",
"ignore": "^5.3.1",
"ipull": "^3.3.0",
"ipull": "^3.6.0",
"is-unicode-supported": "^2.0.0",
"lifecycle-utils": "^1.4.1",
"log-symbols": "^6.0.0",
Expand Down Expand Up @@ -193,7 +193,7 @@
}
},
"optionalDependencies": {
"@node-llama-cpp/win-x64-cuda": "0.1.0",
"@node-llama-cpp/linux-x64-cuda": "0.1.0"
"@node-llama-cpp/linux-x64-cuda": "0.1.0",
"@node-llama-cpp/win-x64-cuda": "0.1.0"
}
}
46 changes: 20 additions & 26 deletions src/cli/recommendedModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,21 @@ export const recommendedModels: ModelRecommendation[] = [{

fileOptions: [{
huggingFace: {
model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"
file: "Meta-Llama-3.1-8B-Instruct.Q8_0.gguf"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf"
file: "Meta-Llama-3.1-8B-Instruct.Q6_K.gguf"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-8B-Instruct-Q5_K_L.gguf"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
file: "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf"
}
}]
}, {
Expand All @@ -40,33 +34,27 @@ export const recommendedModels: ModelRecommendation[] = [{

fileOptions: [{
huggingFace: {
model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-70B-Instruct-Q8_0/Meta-Llama-3.1-70B-Instruct-Q8_0-00001-of-00002.gguf"
file: "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-70B-Instruct-Q6_K_L/Meta-Llama-3.1-70B-Instruct-Q6_K_L-00001-of-00002.gguf"
file: "Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf"
file: "Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf"
}
}, {
huggingFace: {
model: "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-70B-Instruct-IQ4_XS.gguf"
file: "Meta-Llama-3.1-70B-Instruct.Q4_K_S.gguf"
}
}]
}, {
Expand All @@ -82,6 +70,12 @@ export const recommendedModels: ModelRecommendation[] = [{
branch: "main",
file: "Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5"
}
}, {
huggingFace: {
model: "mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF",
branch: "main",
file: "Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4"
}
}]
}, {
name: "Phi 3 3.8B",
Expand Down
4 changes: 2 additions & 2 deletions src/gguf/insights/GgufInsights.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ export class GgufInsights {
const uint32TBytes = 4; // sizeof(uint32_t)
const int32TBytes = 4; // sizeof(int32_t)

// source: `llama_get_state_size` in `llama.cpp`
// source: `llama_state_get_size` in `llama.cpp`
const sRngSize = sizeTBytes;
const sRng = this._llama._consts.llamaMaxRngState;
const sRng = 64 * 1024; // LLAMA_MAX_RNG_STATE
const sNOutputs = sizeTBytes;
const sNOutputPos = batchSize * int32TBytes;
const sLogitsSize = sizeTBytes;
Expand Down
Loading

0 comments on commit 826334b

Please sign in to comment.