Skip to content

Commit

Permalink
Remove UT of onnxrt woq tune for large model to save CI time (#1548)
Browse files Browse the repository at this point in the history
Signed-off-by: yuwenzho <[email protected]>
  • Loading branch information
yuwenzho authored Jan 18, 2024
1 parent 844a032 commit 3d6975b
Showing 1 changed file with 0 additions and 24 deletions.
24 changes: 0 additions & 24 deletions test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,11 @@ def setUpClass(self):
self.gptj_fp16_model = onnx.load("gptj_fp16/decoder_model.onnx")
self.gptj_dataloader = DummyNLPDataloader("hf-internal-testing/tiny-random-gptj")

cmd = (
"optimum-cli export onnx --model PY007/TinyLlama-1.1B-Chat-v0.3 --task text-generation --legacy tiny-llama/"
)
p = subprocess.Popen(
cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
) # nosec
p.communicate()

self.llama_model = "tiny-llama/decoder_model.onnx"
self.llama_dataloader = DummyNLPDataloader("PY007/TinyLlama-1.1B-Chat-v0.3")

@classmethod
def tearDownClass(self):
shutil.rmtree("nc_workspace", ignore_errors=True)
shutil.rmtree("gptj", ignore_errors=True)
shutil.rmtree("gptj_fp16", ignore_errors=True)
shutil.rmtree("tiny-llama", ignore_errors=True)

@unittest.skipIf("CUDAExecutionProvider" not in ort.get_available_providers(), "Skip cuda woq test")
def test_RTN_quant_with_woq_op(self):
Expand Down Expand Up @@ -478,18 +466,6 @@ def fake_eval(model, eval_result_lst):
)
self.assertEqual(self._count_woq_matmul(woq_model, bits=8), 31)

def test_woq_tune_with_large_model(self):
from functools import partial

def fake_eval(model, eval_result_lst):
acc = eval_result_lst.pop(0)
return acc

# Expect tuning ends with WOQ algorithm 'RTN_G32ASYM'
partial_fake_eval = partial(fake_eval, eval_result_lst=[1, 1.1])
woq_model = self._test_woq_tune_common(self.llama_model, self.llama_dataloader, partial_fake_eval)
self.assertEqual(self._count_woq_matmul(woq_model), 155)

def test_woq_with_ModelProto_input(self):
from neural_compressor.model.onnx_model import ONNXModel

Expand Down

0 comments on commit 3d6975b

Please sign in to comment.