From f6250b8df4cf494419ea5ae6db500bef1f60d1e3 Mon Sep 17 00:00:00 2001 From: crasm Date: Thu, 23 Nov 2023 15:15:53 -0500 Subject: [PATCH 1/3] convert.py : make script executable --- convert.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 convert.py diff --git a/convert.py b/convert.py old mode 100644 new mode 100755 From 989c85f9867cb39fbcabcef124508c64aea19b4f Mon Sep 17 00:00:00 2001 From: crasm Date: Thu, 23 Nov 2023 15:16:11 -0500 Subject: [PATCH 2/3] llama : fix doc for yarn_ext_factor unspecified value --- common/common.cpp | 2 +- llama.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1dcc235eac0e6..56f070345fc2a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -792,7 +792,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n"); printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n"); printf(" --yarn-orig-ctx N YaRN: original context size of model (default: 0 = model training context size)\n"); - printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation)\n"); + printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation, <0.0 = not specified)\n"); printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n"); printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow); printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast); diff --git a/llama.h b/llama.h index 1a62058d1406b..3cc71a24824bd 100644 --- a/llama.h +++ b/llama.h @@ -185,7 +185,7 @@ extern "C" { // ref: https://github.com/ggerganov/llama.cpp/pull/2054 float rope_freq_base; // RoPE base frequency, 0 = from model float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model - float yarn_ext_factor; // YaRN extrapolation mix factor, NaN = from model + float yarn_ext_factor; // YaRN extrapolation mix factor, <0.0 = from model float yarn_attn_factor; // YaRN magnitude scaling factor float yarn_beta_fast; // YaRN low correction dim float yarn_beta_slow; // YaRN high correction dim From 2da27621a7fa9ca30e6f235a1c049c4a120e568c Mon Sep 17 00:00:00 2001 From: crasm Date: Fri, 24 Nov 2023 00:25:32 -0500 Subject: [PATCH 3/3] llama : updates from code review --- common/common.cpp | 2 +- llama.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 56f070345fc2a..1dcc235eac0e6 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -792,7 +792,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n"); printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n"); printf(" --yarn-orig-ctx N YaRN: original context size of model (default: 0 = model training context size)\n"); - printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation, <0.0 = not specified)\n"); + printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation)\n"); printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n"); printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow); printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast); diff --git a/llama.h b/llama.h index 3cc71a24824bd..89cb6198e84b8 100644 --- a/llama.h +++ b/llama.h @@ -185,7 +185,7 @@ extern "C" { // ref: https://github.com/ggerganov/llama.cpp/pull/2054 float rope_freq_base; // RoPE base frequency, 0 = from model float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model - float yarn_ext_factor; // YaRN extrapolation mix factor, <0.0 = from model + float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model float yarn_attn_factor; // YaRN magnitude scaling factor float yarn_beta_fast; // YaRN low correction dim float yarn_beta_slow; // YaRN high correction dim