Skip to content

Commit

Permalink
feat: sync llama.cpp (#62)
Browse files Browse the repository at this point in the history
* feat: sync llama.cpp

* fix: revert commit

* fix: windows build
  • Loading branch information
jhen0409 authored Dec 31, 2024
1 parent 1fc4c92 commit 5cfa9d6
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 18 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ project (llama-node)

set(CMAKE_CXX_STANDARD 17)

execute_process(COMMAND
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

if(NOT DEFINED napi_build_version)
set(napi_build_version 6)
endif()
Expand Down
13 changes: 13 additions & 0 deletions scripts/ggml-cpu-CMakeLists.txt.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
index 683b90af..e1bf104c 100644
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
@@ -80,7 +80,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
message(STATUS "ARM detected")

if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
- message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
+ list(APPEND ARCH_FLAGS /arch:armv8.7)
else()
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
2 changes: 1 addition & 1 deletion src/LlamaCompletionWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void LlamaCompletionWorker::Execute() {

auto sparams = llama_sampler_chain_default_params();

LlamaCppSampling sampling{common_sampler_init(model, _params.sparams),
LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
common_sampler_free};

std::vector<llama_token> prompt_tokens =
Expand Down
31 changes: 15 additions & 16 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,29 +196,28 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
.ThrowAsJavaScriptException();
}
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
params.sparams.temp = get_option<float>(options, "temperature", 0.80f);
params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
params.sparams.mirostat_tau =
params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
params.sampling.top_p = get_option<float>(options, "top_p", 0.95f);
params.sampling.min_p = get_option<float>(options, "min_p", 0.05f);
params.sampling.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
params.sampling.mirostat_tau =
get_option<float>(options, "mirostat_tau", 5.00f);
params.sparams.mirostat_eta =
params.sampling.mirostat_eta =
get_option<float>(options, "mirostat_eta", 0.10f);
params.sparams.penalty_last_n =
params.sampling.penalty_last_n =
get_option<int32_t>(options, "penalty_last_n", 64);
params.sparams.penalty_repeat =
params.sampling.penalty_repeat =
get_option<float>(options, "penalty_repeat", 1.00f);
params.sparams.penalty_freq =
params.sampling.penalty_freq =
get_option<float>(options, "penalty_freq", 0.00f);
params.sparams.penalty_present =
params.sampling.penalty_present =
get_option<float>(options, "penalty_present", 0.00f);
params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
params.sparams.grammar = get_option<std::string>(options, "grammar", "");
params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
std::vector<std::string> stop_words;
if (options.Has("stop") && options.Get("stop").IsArray()) {
auto stop_words_array = options.Get("stop").As<Napi::Array>();
Expand Down
2 changes: 1 addition & 1 deletion src/llama.cpp
Submodule llama.cpp updated 400 files

0 comments on commit 5cfa9d6

Please sign in to comment.