Skip to content

Commit

Permalink
feat: Update llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Oct 31, 2024
1 parent 7403e00 commit e712cff
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 27 deletions.
14 changes: 0 additions & 14 deletions llama_cpp/_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,6 @@ def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: i
self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep
)

def sample_tail_free(
self, candidates: "_LlamaTokenDataArray", z: float, min_keep: int
):
llama_cpp.llama_sample_tail_free(
self.ctx, llama_cpp.byref(candidates.candidates), z, min_keep
)

def sample_typical(
self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int
):
Expand Down Expand Up @@ -685,9 +678,6 @@ def sample(
ctx_main.sample_top_k(
token_data_array, self.params.top_k, min_keep=min_keep
)
ctx_main.sample_tail_free(
token_data_array, self.params.tfs_z, min_keep=min_keep
)
ctx_main.sample_typical(
token_data_array, self.params.typical_p, min_keep=min_keep
)
Expand Down Expand Up @@ -776,10 +766,6 @@ def add_min_p(self, p: float, min_keep: int):
sampler = llama_cpp.llama_sampler_init_min_p(p, min_keep)
self._add_sampler(sampler)

def add_tail_free(self, z: float, min_keep: int):
sampler = llama_cpp.llama_sampler_init_tail_free(z, min_keep)
self._add_sampler(sampler)

def add_typical(self, p: float, min_keep: int):
sampler = llama_cpp.llama_sampler_init_typical(p, min_keep)
self._add_sampler(sampler)
Expand Down
1 change: 0 additions & 1 deletion llama_cpp/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,6 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
n_probs = 0
min_keep = max(1, n_probs)
sampler.add_top_k(top_k)
sampler.add_tail_free(tfs_z, min_keep)
sampler.add_typical(typical_p, min_keep)
sampler.add_top_p(top_p, min_keep)
sampler.add_min_p(min_p, min_keep)
Expand Down
46 changes: 35 additions & 11 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3191,17 +3191,6 @@ def llama_sampler_init_min_p(p: float, min_keep: int) -> llama_sampler_p:
...


# /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
# LLAMA_API struct llama_sampler * llama_sampler_init_tail_free (float z, size_t min_keep);
@ctypes_function(
"llama_sampler_init_tail_free",
[ctypes.c_float, ctypes.c_size_t],
llama_sampler_p_ctypes,
)
def llama_sampler_init_tail_free(z: float, min_keep: int) -> llama_sampler_p:
...


# /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
# LLAMA_API struct llama_sampler * llama_sampler_init_typical (float p, size_t min_keep);
@ctypes_function(
Expand Down Expand Up @@ -3343,6 +3332,41 @@ def llama_sampler_init_penalties(
...


# /// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
# LLAMA_API struct llama_sampler * llama_sampler_init_dry(
# const struct llama_model * model,
# float dry_multiplier,
# float dry_base,
# int32_t dry_allowed_length,
# int32_t dry_penalty_last_n,
# const char ** seq_breakers,
# size_t num_breakers);
@ctypes_function(
"llama_sampler_init_dry",
[
llama_model_p_ctypes,
ctypes.c_float,
ctypes.c_float,
ctypes.c_int32,
ctypes.c_int32,
ctypes.POINTER(ctypes.c_char_p),
ctypes.c_size_t,
],
llama_sampler_p_ctypes,
)
def llama_sampler_init_dry(
model: llama_model_p,
dry_multiplier: float,
dry_base: float,
dry_allowed_length: int,
dry_penalty_last_n: int,
seq_breakers: CtypesArray[bytes],
num_breakers: int,
/,
) -> llama_sampler_p:
...


# LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
# int32_t n_vocab,
# int32_t n_logit_bias,
Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp
Submodule llama.cpp updated 62 files
+1 −1 ci/run.sh
+67 −13 common/arg.cpp
+4 −14 common/common.cpp
+55 −34 common/common.h
+19 −11 common/sampling.cpp
+4 −0 convert_hf_to_gguf.py
+1 −0 convert_hf_to_gguf_update.py
+3 −3 convert_lora_to_gguf.py
+22 −110 examples/llama-bench/llama-bench.cpp
+128 −42 examples/llama.vim
+33 −10 examples/main/README.md
+30 −8 examples/server/README.md
+11 −4 examples/server/public/index-new.html
+8 −2 examples/server/public/index.html
+0 −0 examples/server/public/style.css
+202 −379 examples/server/server.cpp
+36 −0 examples/server/tests/features/infill.feature
+46 −0 examples/server/tests/features/steps/steps.py
+0 −2 examples/server/themes/buttons-top/index.html
+0 −2 examples/server/themes/wild/index.html
+246 −14 examples/server/utils.hpp
+3 −3 flake.lock
+12 −7 ggml/include/ggml-backend.h
+1 −1 ggml/include/ggml-cuda.h
+4 −0 ggml/include/ggml-kompute.h
+2 −0 ggml/src/CMakeLists.txt
+268 −0 ggml/src/ggml-aarch64.c
+8 −25 ggml/src/ggml-amx.cpp
+4 −15 ggml/src/ggml-backend-impl.h
+127 −115 ggml/src/ggml-backend.cpp
+5 −15 ggml/src/ggml-blas.cpp
+6 −44 ggml/src/ggml-cann.cpp
+83 −88 ggml/src/ggml-cuda.cu
+1 −1 ggml/src/ggml-cuda/cpy.cuh
+3 −3 ggml/src/ggml-cuda/im2col.cu
+1 −3 ggml/src/ggml-cuda/mmq.cu
+238 −72 ggml/src/ggml-kompute.cpp
+178 −71 ggml/src/ggml-metal.m
+558 −196 ggml/src/ggml-metal.metal
+2 −18 ggml/src/ggml-rpc.cpp
+14 −40 ggml/src/ggml-sycl.cpp
+80 −18 ggml/src/ggml-vulkan.cpp
+94 −28 ggml/src/ggml.c
+9 −0 ggml/src/kompute-shaders/common.comp
+133 −0 ggml/src/kompute-shaders/op_mul_mat_q4_k.comp
+57 −0 ggml/src/llamafile/sgemm.cpp
+74 −0 ggml/src/vulkan-shaders/pool2d.comp
+4 −0 ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
+12 −8 include/llama.h
+1 −1 scripts/compare-llama-bench.py
+1 −1 scripts/run-with-preset.py
+7 −0 scripts/sync-ggml-am.sh
+1 −1 scripts/sync-ggml.last
+3 −0 scripts/sync-ggml.sh
+392 −96 src/llama-sampling.cpp
+18 −0 src/llama-sampling.h
+16 −0 src/llama-vocab.cpp
+5 −0 src/llama-vocab.h
+1,815 −1,686 src/llama.cpp
+100 −22 tests/test-backend-ops.cpp
+4 −4 tests/test-chat-template.cpp
+37 −21 tests/test-sampling.cpp

0 comments on commit e712cff

Please sign in to comment.