Skip to content

Commit

Permalink
k-quants : remove unnecessary tensor shape restrictions (#2811)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov authored Aug 26, 2023
1 parent 7592375 commit 04f4b1e
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s

if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
int nx = tensor->ne[0];
int ny = tensor->ne[1];
if (nx % QK_K == 0 && ny % QK_K == 0) {
if (nx % QK_K == 0) {
new_type = GGML_TYPE_Q6_K;
}
} else if (name.find("attn_v.weight") != std::string::npos) {
Expand Down Expand Up @@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
int nx = tensor->ne[0];
int ny = tensor->ne[1];
if (nx % QK_K != 0 || ny % QK_K != 0) {
LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K);
if (nx % QK_K != 0) {
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
convert_incompatible_tensor = true;
}
}
Expand Down

0 comments on commit 04f4b1e

Please sign in to comment.