Skip to content

Commit

Permalink
exl2 phi does not use packed QKV/gate-up projections
Browse files Browse the repository at this point in the history
  • Loading branch information
danieldk committed Jun 26, 2024
1 parent be2d380 commit 7947c34
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def load_attention(config, prefix, weights, layer_id):
sizes = None
prefixes = None

if config.model_type == "phi3":
if config.model_type == "phi3" and config.quantize != "exl2":
prefix = f"{prefix}.qkv_proj"
base_layer = TensorParallelColumnLinear.load_qkv(
config,
Expand Down Expand Up @@ -246,7 +246,7 @@ def __init__(self, prefix, config, weights, index):

# Fuse gate and up proj
bias = getattr(config, "mlp_bias", False)
if config.model_type == "phi3":
if config.model_type == "phi3" and config.quantize != "exl2":
gate_up_proj = TensorParallelColumnLinear.load_gate_up(
config,
prefix=f"{prefix}.gate_up_proj",
Expand Down

0 comments on commit 7947c34

Please sign in to comment.