Skip to content

Commit

Permalink
Move verify_marlin_supported to GPTQMarlinLinearMethod (vllm-project#…
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Sep 5, 2024
1 parent 9da25a8 commit 2ee4528
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions vllm/model_executor/layers/quantization/gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ def __init__(self, weight_bits: int, group_size: int, desc_act: bool,

self.quant_type = self.TYPE_MAP[(weight_bits, is_sym)]

# Verify supported on platform.
verify_marlin_supported(quant_type=self.quant_type,
group_size=self.group_size)

def __repr__(self) -> str:
return (f"GPTQMarlinConfig(quant_type={self.quant_type}, "
f"group_size={self.group_size}, "
Expand Down Expand Up @@ -153,6 +149,10 @@ class GPTQMarlinLinearMethod(LinearMethodBase):
def __init__(self, quant_config: GPTQMarlinConfig) -> None:
self.quant_config = quant_config

# Verify supported on platform.
verify_marlin_supported(quant_type=self.quant_config.quant_type,
group_size=self.quant_config.group_size)

def create_weights(
self,
layer: torch.nn.Module,
Expand Down

0 comments on commit 2ee4528

Please sign in to comment.