Skip to content

Commit

Permalink
support falcon woq quantization (#1280)
Browse files Browse the repository at this point in the history
Signed-off-by: Xin He <[email protected]>
  • Loading branch information
xin3he authored Sep 25, 2023
1 parent dffcfe1 commit 595d3a1
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,7 @@ def _get_quantizable_ops(self, model):
self.use_bf16
and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
and (self.version.release >= Version("1.11.0").release)
and self.approach != "post_training_weight_only"
):
self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16")
bf16_ops = []
Expand Down Expand Up @@ -4817,7 +4818,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):

module_dict = dict(model.named_modules())
for op_name, child in module_dict.items():
if type(child) in self.white_list:
if isinstance(child, tuple(self.white_list)):
quantizable_ops.append((op_name, str(child.__class__.__name__)))

@dump_elapsed_time("Pass query framework capability")
Expand Down

0 comments on commit 595d3a1

Please sign in to comment.