Skip to content

Commit

Permalink
fix awq padding issue (#1790)
Browse files Browse the repository at this point in the history
Signed-off-by: Mengni Wang <[email protected]>
  • Loading branch information
mengniwang95 authored May 21, 2024
1 parent 4728fdc commit 903da49
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion neural_compressor/adaptor/ox_utils/weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
scales = np.clip(np.power(inp_scale, ratio) / np.power(w_scale, (1 - ratio)), 1e-4, None)
scales = scales / np.sqrt(np.max(scales) * np.min(scales))
weight = weight.T * scales
weight = pad_tensor(weight, group_size, (org_w_shape[0] + group_size - 1) // group_size).T
weight = pad_tensor(weight.T, group_size, (org_w_shape[0] + group_size - 1) // group_size)

if (Version(ort.__version__) > ONNXRT1161_VERSION and num_bits == 4) or (
Version(ort.__version__) >= ONNXRT116_VERSION and num_bits == 4 and group_size == 32
Expand All @@ -485,6 +485,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
q_weight = qdq_tensor(weight, num_bits, group_size, scheme, "int") / np.expand_dims(scales, axis=-1)

q_weight = np.reshape(q_weight, (org_w_shape[1], -1))[:, : org_w_shape[0]]

out = np.matmul(inp, q_weight.T)
loss += np.mean(np.power((org_out - out), 2))

Expand Down

0 comments on commit 903da49

Please sign in to comment.