fix style

intel-analytics · Dec 24, 2024 · 50ace72 · 50ace72
1 parent c37a180
commit 50ace72
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 19 deletions.
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/common.py b/python/llm/src/ipex_llm/transformers/npu_models/common.py
@@ -111,5 +111,5 @@ def is_auto_round_model(model: torch.nn.Module):
     if hasattr(model, "quantization_config"):
         quant_config = getattr(model.config, "quantization_config", None)
         if quant_config is not None and quant_config.quant_method == "intel/auto-round":
-            return  True
+            return True
     return False
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_auto_round_model.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_auto_round_model.py
@@ -28,7 +28,7 @@ def unpack_auto_round_layer(layer, qtype="sym_int4_rtn"):
     n, m = layer.infeatures, layer.outfeatures
     weight = layer.qweight.to("cpu")
     scale = layer.scales.to("cpu")
-    zeros = layer.qzeros.to("cpu") # np.int32, 1 x m // 4
+    zeros = layer.qzeros.to("cpu")  # np.int32, 1 x m // 4
     bits = layer.bits
 
     scale = scale.t().contiguous()
@@ -38,13 +38,13 @@ def unpack_auto_round_layer(layer, qtype="sym_int4_rtn"):
 
     for i in range(0, n // num):
         for j in range(0, num):
-            int_weight[i * num + j, :] = (( weight[i, :] >> (j * bits) )  & 0x0000000F ).to(torch.uint8)
+            int_weight[i*num + j, :] = ((weight[i, :] >> (j*bits)) & 0x0000000F).to(torch.uint8)
 
-    int_weight = (int_weight - 8).to(torch.int8) # n, m
-    qweights = int_weight.t().contiguous() # m, n
+    int_weight = (int_weight - 8).to(torch.int8)  # n, m
+    qweights = int_weight.t().contiguous()  # m, n
 
     # if we want to transform it to our NPU format, uncomment below code
-    qweights = qweights.reshape(m, -1 , 2) # m * n/2 * 2
+    qweights = qweights.reshape(m, -1, 2)  # m * n/2 * 2
     low_bit, high_bit = qweights.split(1, dim=-1)
     high_bit = high_bit.squeeze().view(torch.int8)
     low_bit = low_bit.squeeze().view(torch.int8)
@@ -61,16 +61,16 @@ def unpack_auto_round_layer(layer, qtype="sym_int4_rtn"):
 
         for i in range(0, m // num):
             for j in range(0, num):
-                int_zero[:, i * num + j] = (( zero[:, i] >> (j * bits) )  & 0x0000000F ).to(torch.uint8)
+                int_zero[:, i*num + j] = ((zero[:, i] >> (j*bits)) & 0x0000000F).to(torch.uint8)
 
         zero = int_zero.to(torch.int8)
-        zero = zero.t().contiguous() # m, 1
+        zero = zero.t().contiguous()  # m, 1
         zero = zero.to(torch.float32) * -1 * scale
         zero += 8 * scale
     else:
         invalidInputError(False,
                           f"unpack_auto_round_layer does not support qtype {qtype}.")
-    return  qweights.view(torch.uint8), scale.to(torch.float16), zero.to(torch.float16)
+    return qweights.view(torch.uint8), scale.to(torch.float16), zero.to(torch.float16)
 
 
 @module_optimization
@@ -85,7 +85,7 @@ def replace_with_QuantizedLinear(layer, qtype, device, modules_to_not_convert,
             # auto-round's QuantLinear
             qweights, scale, zero = unpack_auto_round_layer(layer, qtype=qtype)
             return QuantizedLinear(qweights, scale, zero, layer.bias,
-                                group_size=group_size, qtype=qtype)
+                                   group_size=group_size, qtype=qtype)
     elif isinstance(layer, torch.nn.Linear) and not hasattr(layer, "qtype"):
         enable_scale_search = (os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" or
                                os.environ.get("IPEX_LLM_NPU_QUANTIZATION_HQQ", "0") != "0")
@@ -101,9 +101,10 @@ def replace_with_QuantizedLinear(layer, qtype, device, modules_to_not_convert,
                                group_size=group_size, qtype=qtype)
 
 
-def convert_auto_round_model_to_npu_model(model, save_directory, max_context_len =  1024, max_prompt_len = 960,
-                                          transpose_value_cache = True, fuse_layers = None, mixed_precision = False,
-                                          inter_pp = None, intra_pp = None, optimize_model=True):
+def convert_auto_round_model_to_npu_model(model, save_directory, max_context_len=1024,
+                                          max_prompt_len=960, transpose_value_cache=True,
+                                          fuse_layers=None, mixed_precision=False,
+                                          inter_pp=None, intra_pp=None, optimize_model=True):
     quant_config = getattr(model.config, "quantization_config", None)
     if quant_config is None and quant_config.quant_method != "intel/auto-round":
         exit(-1)
@@ -112,16 +113,16 @@ def convert_auto_round_model_to_npu_model(model, save_directory, max_context_len
     group_size = quant_config.group_size
     sym = quant_config.sym
 
-    if sym and bits == 4 :
+    if sym and bits == 4:
         qtype = "sym_int4_rtn"
     elif not sym and bits == 4:
         qtype = "asym_int4_rtn"
-    elif sym and  bits == 4:
+    elif sym and bits == 4:
         qtype = "sym_int8_rtn"
     else:
         invalidInputError(False,
-                            "Invalid dtype.")
-    
+                          "Invalid dtype.")
+
     if group_size == -1:
         quantization_group_size = 0
     else:

diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py
@@ -207,7 +207,7 @@ def convert_llm(model: torch.nn.Module,
             n_splits_down_proj = 1
         else:
             if is_auto_round_model(model):
-                n_splits_down_proj = 1 # for auto-round
+                n_splits_down_proj = 1  # for auto-round
             else:
                 n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
                                            os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or

diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/qwen.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/qwen.py
@@ -308,7 +308,8 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
                         weights.append((torch.stack(l_weights, axis=0), torch.stack(scales, axis=0),
                                         torch.stack(zeros, axis=0)))
                     else:
-                        weights.append((torch.stack(l_weights, axis=0), torch.stack(scales, axis=0)))
+                        weights.append((torch.stack(l_weights, axis=0),
+                                        torch.stack(scales, axis=0)))
             else:
                 for layer in [attn_layer.q_proj, attn_layer.k_proj,
                               attn_layer.v_proj, attn_layer.o_proj,