From 665c6ebfc8cfbb334a43fc95e2a040774ddb6253 Mon Sep 17 00:00:00 2001 From: alexm Date: Wed, 15 May 2024 12:01:54 -0400 Subject: [PATCH] sync with Rob changes: --- benchmarks/kernels/benchmark_marlin.py | 1 + vllm/model_executor/layers/quantization/utils/marlin_utils.py | 1 + vllm/model_executor/layers/quantization/utils/quant_utils.py | 1 + 3 files changed, 3 insertions(+) diff --git a/benchmarks/kernels/benchmark_marlin.py b/benchmarks/kernels/benchmark_marlin.py index 0b1486840cff8..313d11ec0bf65 100644 --- a/benchmarks/kernels/benchmark_marlin.py +++ b/benchmarks/kernels/benchmark_marlin.py @@ -22,6 +22,7 @@ def bench_run(results, model, act_order, is_k_full, num_bits, group_size, size_m, size_k, size_n): label = "Quant Matmul" + sub_label = ("{}, act={} k_full={}, b={}, g={}, " "MKN=({}x{}x{})".format(model, act_order, is_k_full, num_bits, group_size, size_m, size_k, size_n)) diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils.py b/vllm/model_executor/layers/quantization/utils/marlin_utils.py index e6dd336f4ba16..33b3169983475 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils.py @@ -1,3 +1,4 @@ +"""This file is used for /tests and /benchmarks""" import numpy import torch diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index 28888b1a0691c..177cb23f63cf4 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -1,3 +1,4 @@ +"""This file is used for /tests and /benchmarks""" import numpy import torch