Skip to content

Commit

Permalink
llama : fix command-r inference when omitting outputs (#6367)
Browse files Browse the repository at this point in the history
  • Loading branch information
compilade authored Mar 28, 2024
1 parent 28cb9a0 commit 0308f5e
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9152,8 +9152,9 @@ struct llm_build_context {
if (il == n_layer - 1) {
// skip computing output for unused tokens
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
}

struct ggml_tensor * attn_out = cur;
Expand Down

0 comments on commit 0308f5e

Please sign in to comment.