diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index 255c2d6db2a..eced733ca8d 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -93,12 +93,18 @@ FLOAT_MAPPING = {"nf4": NF4, "fp4": FP4_BNB, "fp4_e2m1_bnb": FP4_BNB, "fp4_e2m1": FP4_E2M1} INT_MAPPING = {"nf4": NF4_BIT, "fp4": FP4_BNB_BIT, "fp4_e2m1_bnb": FP4_BNB_BIT, "fp4_e2m1": FP4_E2M1_BIT} -FP8_MAPPING = { - "fp8_e5m2": torch.float8_e5m2, - "fp8_e5m2fnuz": torch.float8_e5m2fnuz, - "fp8_e4m3fn": torch.float8_e4m3fn, - "fp8_e4m3fnuz": torch.float8_e4m3fnuz, -} +if hasattr(torch, "float8_e5m2") and hasattr(torch, "float8_e4m3fn"): + FP8_MAPPING = { + "fp8_e5m2": torch.float8_e5m2, + "fp8_e4m3fn": torch.float8_e4m3fn, + } +if hasattr(torch, "float8_e5m2fnuz") and hasattr(torch, "float8_e4m3fnuz"): + FP8_MAPPING = { + "fp8_e5m2": torch.float8_e5m2, + "fp8_e4m3fn": torch.float8_e4m3fn, + "fp8_e5m2fnuz": torch.float8_e5m2fnuz, + "fp8_e4m3fnuz": torch.float8_e4m3fnuz, + } def quantize_4bit(tensor, quantile=1.0, dtype="nf4", return_int=False, **kwargs):