Skip to content

Commit

Permalink
format code
Browse files Browse the repository at this point in the history
  • Loading branch information
yufenglee committed Oct 9, 2023
1 parent 1b2d7ee commit 3b59970
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void QuantizeBlockwise(
zero_points[zp_idx] = ((zero_points_tmp[zp_idx * 2]) | (zero_points_tmp[zp_idx * 2 + 1] << 4));
}
if (total_block_count & 1) {
zero_points[total_block_count / 2] = (zero_points[total_block_count / 2] &0xf0) | zero_points_tmp[total_block_count - 1];
zero_points[total_block_count / 2] = (zero_points[total_block_count / 2] & 0xf0) | zero_points_tmp[total_block_count - 1];

Check warning on line 61 in onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise.h#L61

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise.h:61:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/python/onnxruntime_pybind_quant.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

namespace pybind11 {
namespace detail {
// python3 -c 'import numpy as np; print(np.dtype(np.float16).num)'
// python3 -c 'import numpy as np; print(np.dtype(np.float16).num)'
constexpr int NPY_FLOAT16 = 23;
template <>
struct npy_format_descriptor<onnxruntime::MLFloat16> {
Expand All @@ -19,7 +19,7 @@ struct npy_format_descriptor<onnxruntime::MLFloat16> {
handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16);
return reinterpret_borrow<pybind11::dtype>(ptr);
}
static std::string format() {
static std::string format() {
// following: https://docs.python.org/3/library/struct.html#format-characters
return "e";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def report(self):
def profile_dequantize_int4_func(n, k, dtype, func):
np.random.seed(0)
output = np.random.rand(n, k).astype(dtype)
quant = np.random.randint(low=0, high=127, size=(n, (k+31)//32, 16)).astype('uint8')
scales = np.random.rand(n, (k+31)//32).astype(dtype)
quant = np.random.randint(low=0, high=127, size=(n, (k + 31) // 32, 16)).astype("uint8")
scales = np.random.rand(n, (k + 31) // 32).astype(dtype)

output_d = ke.DeviceArray(output)
quant_d = ke.DeviceArray(quant)
Expand All @@ -55,11 +55,10 @@ def profile_with_args(n, k, dtype, sort):


def profile():
dims = [4096, 12288,]
for dt in dtypes:
for n,k in ((4096, 4096), (4096, 12288), (12288, 4096)):
profile_with_args(n, k, dt, True)
print()
for n, k in ((4096, 4096), (4096, 12288), (12288, 4096)):
profile_with_args(n, k, dt, True)
print()


if __name__ == "__main__":
Expand Down
22 changes: 11 additions & 11 deletions onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,30 @@
# --------------------------------------------------------------------------

import argparse
import struct
from pathlib import Path
from typing import List, Tuple

import logging
import os
from typing import List, Tuple

import coloredlogs
import numpy as np
import numpy.typing as npt
import onnx
from onnx.onnx_pb import GraphProto, ModelProto, NodeProto, TensorProto
import os

from .onnx_model import ONNXModel
from .quant_utils import attribute_to_kwarg, load_model_with_shape_infer
import coloredlogs
from onnxruntime.capi._pybind_state import quantize_matmul_4bits

from .onnx_model import ONNXModel
from .quant_utils import attribute_to_kwarg

logger = logging.getLogger(__name__)


class MatMul4BitsQuantizer:
"""Perform 4b quantization of constant MatMul weights"""

def __init__(self, model: ModelProto, block_size: int, is_symmetric: bool, nodes_to_exclude=[]):
def __init__(self, model: ModelProto, block_size: int, is_symmetric: bool, nodes_to_exclude=None):
if nodes_to_exclude is None:
nodes_to_exclude = []
self.model = ONNXModel(model)
self.block_size = block_size
self.is_symmetric = is_symmetric
Expand Down Expand Up @@ -97,13 +97,13 @@ def _q4_matmul_node_weight(self, node: NodeProto, graph_stack: List[GraphProto])
Bs_graph.input.remove(input)
break

scales_tensor = onnx.numpy_helper.from_array(scales) # noqa: N806
scales_tensor = onnx.numpy_helper.from_array(scales)
scales_tensor.name = B.name + "_scales"
Bs_graph.initializer.extend([B_quant, scales_tensor])

input_names = [node.input[0], B_quant.name, scales_tensor.name]
if not self.is_symmetric:
zp_tensor = onnx.numpy_helper.from_array(zero_points) # noqa: N806
zp_tensor = onnx.numpy_helper.from_array(zero_points)
zp_tensor.name = B.name + "_zero_points"
Bs_graph.initializer.extend([zp_tensor])
input_names.append(zp_tensor.name)
Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/python/tools/transformers/llama2_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from optimum.onnxruntime import ORTModelForCausalLM

name = "meta-llama/Llama-2-7b-hf"
model = ORTModelForCausalLM.from_pretrained(
name,
export=True,
use_auth_token=True,
)
model.save_pretrained(name.split("/")[-1] + "-onnx")
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, bool has_zerop
int64_t buf_size = number_of_block * (block_size * 4 / 8);
std::vector<uint8_t> input1_vals(buf_size);
std::vector<float> scales(number_of_block);
std::vector<uint8_t> zp((N *block_per_k + 1) / 2);
std::vector<uint8_t> zp((N * block_per_k + 1) / 2);

QuantizeDequantize(input1_f_vals, input1_vals, scales, has_zeropoint ? &zp : nullptr, N, K, block_size);

Expand Down
9 changes: 5 additions & 4 deletions onnxruntime/test/python/quantization/test_op_matmul_4bits.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
class TestOpMatMul4Bits(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls._tmp_model_dir = tempfile.TemporaryDirectory(prefix="test_matmulfpq4.")
cls._tmp_model_dir = Path("test_models")
cls._tmp_model_dir = tempfile.TemporaryDirectory(prefix="test_matmulfpq4.")

@classmethod
def tearDownClass(cls):
#cls._tmp_model_dir.cleanup()
cls._tmp_model_dir.cleanup()
pass

Check warning

Code scanning / CodeQL

Unnecessary pass Warning test

Unnecessary 'pass' statement.

def fill_int4_data(self, shape: Union[int, Tuple[int, ...]], symmetric: bool) -> np.ndarray:
Expand Down Expand Up @@ -114,7 +113,9 @@ def quant_test(
block_size: int,
is_symmetric: bool,
):
model_int4_path = str(Path(self._tmp_model_dir.name).joinpath(f"MatMulNBits_{block_size}_{is_symmetric}.onnx").absolute())
model_int4_path = str(
Path(self._tmp_model_dir.name).joinpath(f"MatMulNBits_{block_size}_{is_symmetric}.onnx").absolute()
)

# Quantize fp32 model to int4 model
model = quant_utils.load_model_with_shape_infer(Path(model_fp32_path))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# --------------------------------------------------------------------------

import unittest

import numpy as np
import numpy.typing as npt

Expand Down

0 comments on commit 3b59970

Please sign in to comment.