Skip to content

Commit

Permalink
quanto: update to latest (w/ latest lycoris-dev)
Browse files Browse the repository at this point in the history
  • Loading branch information
bghira committed Oct 1, 2024
1 parent e615252 commit 83c13e0
Show file tree
Hide file tree
Showing 6 changed files with 4,078 additions and 19 deletions.
18 changes: 11 additions & 7 deletions helpers/training/quantisation/quanto_workarounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import optimum
from optimum.quanto.library.extensions.cuda import ext as quanto_ext

@torch.library.custom_op(
"quanto::fp8_marlin_gemm", mutates_args=(), device_types=["cuda"]
)
@torch.library.impl("quanto::gemm_f16f8_marlin", ["CUDA"])
def fp8_marlin_gemm(
a: torch.Tensor,
b_q_weight: torch.Tensor,
Expand All @@ -35,7 +33,9 @@ def fp8_marlin_gemm(

optimum.quanto.library.extensions.cuda.fp8_marlin_gemm = fp8_marlin_gemm

class TinyGemmQBitsLinearFunction(optimum.quanto.tensor.function.QuantizedLinearFunction):
class TinyGemmQBitsLinearFunction(
optimum.quanto.tensor.function.QuantizedLinearFunction
):
@staticmethod
def forward(ctx, input, other, bias):
ctx.save_for_backward(input, other)
Expand All @@ -45,12 +45,16 @@ def forward(ctx, input, other, bias):
out_features = other.shape[0]
output_shape = input.shape[:-1] + (out_features,)
output = torch._weight_int4pack_mm(
input.view(-1, in_features).to(dtype=other.dtype), other._data._data, other._group_size, other._scale_shift
input.view(-1, in_features).to(dtype=other.dtype),
other._data._data,
other._group_size,
other._scale_shift,
)
output = output.view(output_shape)
if bias is not None:
output = output + bias
return output

from optimum.quanto.tensor.weights import tinygemm
tinygemm.qbits.TinyGemmQBitsLinearFunction = TinyGemmQBitsLinearFunction

tinygemm.qbits.TinyGemmQBitsLinearFunction = TinyGemmQBitsLinearFunction
8 changes: 4 additions & 4 deletions install/apple/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 83c13e0

Please sign in to comment.