From 7b8a2675daccdfa13990caf1d79fb7f9bfe2b04d Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Thu, 4 Jan 2024 18:59:24 +0800 Subject: [PATCH] Fix weight loading of large model in INC quantzation pass (#857) ## Describe your changes Fix weight loading of large model in inc quantzation pass. Reload weight for model > 2GB to prevent missing weight files. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link #852 --------- Signed-off-by: yuwenzho --- olive/passes/onnx/inc_quantization.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/olive/passes/onnx/inc_quantization.py b/olive/passes/onnx/inc_quantization.py index f0d4c56e5..a4e4158e2 100644 --- a/olive/passes/onnx/inc_quantization.py +++ b/olive/passes/onnx/inc_quantization.py @@ -562,6 +562,13 @@ def _run_for_config( "find any quantized model which meet accuracy goal. " "Try to increase 'max_trials' in 'tuning_criterion'." ) + + # reload weight for model with size > 2GB to prevent error of missing weight files + if q_model.is_large_model: + from onnx.external_data_helper import load_external_data_for_model + + load_external_data_for_model(q_model.model, os.path.dirname(q_model._model_path)) + # save the model to the output path and return the model return model_proto_to_olive_model(q_model.model, output_model_path, config)