Skip to content

Commit

Permalink
fix bug in nf4/fp4 (#1241)
Browse files Browse the repository at this point in the history
Signed-off-by: Xin He <[email protected]>
  • Loading branch information
xin3he authored Sep 12, 2023
1 parent 2019f41 commit 8d2bf27
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 14 deletions.
2 changes: 1 addition & 1 deletion neural_compressor/adaptor/torch_utils/weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def quantize_4bit(tensor, quantile=1.0, data_type="nf4", return_int=False):
allow_data = FLOAT_MAPPING[data_type]
allow_data_bit = INT_MAPPING[data_type]
# get scale and update tensor
scale = tensor.max(1)[0] * quantile / max(allow_data)
scale = tensor.abs().max(1)[0] * quantile / max(allow_data)
scale.unsqueeze_(dim=-1)
tensor = tensor / scale
mid_data = [(allow_data[i] + allow_data[i + 1]) / 2 for i in range(len(allow_data) - 1)]
Expand Down
21 changes: 10 additions & 11 deletions test/adaptor/pytorch_adaptor/test_weight_only_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,31 +239,30 @@ def test_RTN_int_quant(self):
self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear))
self.assertTrue(model_size1 / model_size2 > 2)

def test_RTN_fp4_quant(self):
for dtype in ["nf4", "fp4", "fp4_e2m1_bnb", "fp4_e2m1"]:
input = torch.randn(3, 30)
model = Model()
out1 = model(input)
def test_RTN_4bit_quant(self):
for dtype in ["int4", "nf4", "fp4", "fp4_e2m1_bnb", "fp4_e2m1"]:
model = copy.deepcopy(self.gptj)
out1 = model(self.lm_input)
conf = PostTrainingQuantConfig(
approach="weight_only",
op_type_dict={
".*": { # re.match
"weight": {
"dtype": dtype, # select from int, nf4, or fp4
# nf4/fp4 have fixed bits and scheme.
"group_size": 32, # -1 (per-channel)
"group_size": 64, # -1 (per-channel)
"algorithm": "RTN",
},
},
},
)
q_model = quantization.fit(model, conf)
out2 = q_model(input)
self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1)))
self.assertFalse(torch.all(out1 == out2))
out2 = q_model(self.lm_input)
self.assertTrue(torch.all(torch.isclose(out1[0], out2[0], atol=1e-1)))
self.assertFalse(torch.all(out1[0] == out2[0]))
compressed_model = q_model.export_compressed_model()
out3 = compressed_model(input)
self.assertTrue(torch.all(out3 == out2))
out3 = compressed_model(self.lm_input)
self.assertTrue(torch.all(out3[0] == out2[0]))

def test_AWQ_quant(self):
conf = PostTrainingQuantConfig(
Expand Down
3 changes: 1 addition & 2 deletions test/algorithm/test_smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,8 +780,7 @@ def calib_func(model):
output2 = q_model.model(input_ids)
assert isinstance(q_model.model.fc1, SQLinearWrapper)
# set a big atol to avoid random issue
print(output1, output2)
self.assertTrue(torch.allclose(output1, output2, atol=1e-02))
self.assertTrue(torch.allclose(output1, output2, atol=2e-02))

q_model.save("saved_result")
from neural_compressor.utils.pytorch import load
Expand Down

0 comments on commit 8d2bf27

Please sign in to comment.