From 7b8a2675daccdfa13990caf1d79fb7f9bfe2b04d Mon Sep 17 00:00:00 2001
From: yuwenzho <yuwen.zhou@intel.com>
Date: Thu, 4 Jan 2024 18:59:24 +0800
Subject: [PATCH] Fix weight loading of large model in INC quantzation pass
 (#857)

## Describe your changes

Fix weight loading of large model in inc quantzation pass. Reload weight
for model > 2GB to prevent missing weight files.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.

## (Optional) Issue link

#852

---------

Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
---
 olive/passes/onnx/inc_quantization.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/olive/passes/onnx/inc_quantization.py b/olive/passes/onnx/inc_quantization.py
index f0d4c56e5..a4e4158e2 100644
--- a/olive/passes/onnx/inc_quantization.py
+++ b/olive/passes/onnx/inc_quantization.py
@@ -562,6 +562,13 @@ def _run_for_config(
                 "find any quantized model which meet accuracy goal. "
                 "Try to increase 'max_trials' in 'tuning_criterion'."
             )
+
+        # reload weight for model with size > 2GB to prevent error of missing weight files
+        if q_model.is_large_model:
+            from onnx.external_data_helper import load_external_data_for_model
+
+            load_external_data_for_model(q_model.model, os.path.dirname(q_model._model_path))
+
         # save the model to the output path and return the model
         return model_proto_to_olive_model(q_model.model, output_model_path, config)