Added release_memory

openvinotoolkit · Sep 9, 2024 · e04e7d1 · e04e7d1
1 parent 3d54344
commit e04e7d1
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 14 deletions.
diff --git a/nncf/openvino/quantization/compression_primitives.py b/nncf/openvino/quantization/compression_primitives.py
@@ -272,8 +272,8 @@ def _get_compress_model(
 
         compiled_model = ov.compile_model(model, device_name="CPU")
 
-        SHARE_OUTPUTS = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))
-        return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=SHARE_OUTPUTS)
+        NOT_SHARED_OUTPUTS = bool(int(os.environ.get("NOT_SHARED_OUTPUTS", "0")))
+        return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=not NOT_SHARED_OUTPUTS)
 
     @staticmethod
     def _get_compress_decompress_model(

diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py
@@ -370,6 +370,10 @@ def calculate_quantized_weight(
                 compressed_weights_ov, scale_ov, zero_point_ov = results
             else:
                 compressed_weights_ov, scale_ov = results
+
+        RELEASE_MEMORY = bool(int(os.environ.get("RELEASE_MEMORY", "0")))
+        if RELEASE_MEMORY:
+            compiled_model.release_memory()
     if not ov_compression or COMPARE_WITH_NUMPY:
         if weight.dtype != TensorDataType.float32:
             weight = weight.astype(TensorDataType.float32)

diff --git a/weight_compression.py b/weight_compression.py
@@ -40,20 +40,20 @@ def parse_arguments():
 
     parser.add_argument("--input-dtype", type=str, choices=["fp32", "fp16", "bf16"], default="fp32", help="OV model input dtype")
 
-    parser.add_argument("--bf16-input", action="store_true", help="Enable BF16 input mode")
-
-    parser.add_argument("--int8-output", action="store_true", help="Output in int8")
+    parser.add_argument("--int8-output", action="store_true", help="Output in (u)int8")
 
     parser.add_argument("--recompile", action="store_true", help="Recompile model every time")
 
-    parser.add_argument("--share-outputs", action="store_true", help="Share outputs")
+    parser.add_argument("--not-shared-outputs", action="store_true", help="Do not share outputs")
 
     parser.add_argument("--save-model", action="store_true", help="Save compressed model")
 
     parser.add_argument("--compare-with-numpy", action="store_true", help="Compare compressed weight with the one computed with NumPy")
 
     parser.add_argument("--invert-numpy-division", action="store_true", help="Invert division when compressing with NumPy")
 
+    parser.add_argument("--release-memory", action="store_true", help="Release memory")
+
     return parser.parse_args()
 
 
@@ -73,10 +73,11 @@ def main(args):
     input_dtype = args.input_dtype
     int8_output = args.int8_output
     recompile = args.recompile
-    share_outputs = args.share_outputs
+    not_shared_outputs = args.not_shared_outputs
     save_model = args.save_model
     compare_with_numpy = args.compare_with_numpy
     invert_numpy_division = args.invert_numpy_division
+    release_memory = args.release_memory
     if numpy_compression:
         log_dir_suffix = "numpy"
         if invert_numpy_division:
@@ -88,8 +89,8 @@ def main(args):
         log_dir_suffix = f"{log_dir_suffix}_{f'input-{input_dtype}'}"
         if recompile:
             log_dir_suffix = f"{log_dir_suffix}_recompile"
-        if share_outputs:
-            log_dir_suffix = f"{log_dir_suffix}_share-outputs"
+        if not_shared_outputs:
+            log_dir_suffix = f"{log_dir_suffix}_not-shared-outputs"
 
     memory_monitors = []
     for memory_type, mem_from_zero in [(MemoryType.RSS, False), (MemoryType.SYSTEM, False), (MemoryType.SYSTEM, True)]:
@@ -107,9 +108,10 @@ def main(args):
     os.environ["INPUT_DTYPE"] = input_dtype
     os.environ["INT8_OUTPUT"] = f"{int(int8_output)}"
     os.environ["RECOMPILE"] = f"{int(recompile)}"
-    os.environ["SHARE_OUTPUTS"] = f"{int(share_outputs)}"
+    os.environ["NOT_SHARED_OUTPUTS"] = f"{int(not_shared_outputs)}"
     os.environ["COMPARE_WITH_NUMPY"] = f"{int(compare_with_numpy)}"
     os.environ["INVERT_NUMPY_DIVISION"] = f"{int(invert_numpy_division)}"
+    os.environ["RELEASE_MEMORY"] = f"{int(release_memory)}"
 
     start_time = time.perf_counter()
     compressed_model = nncf.compress_weights(model, mode=nncf.CompressWeightsMode.INT8_ASYM)
@@ -155,9 +157,11 @@ def main(args):
         if not csv_exists:
             f.write(
                 "Model Path,"
-                "Numpy,"
-                "Submodel Type,"
-                "Input,Output,"
+                "Backend,"
+                "End-to-end,"
+                "Input Shapes,"
+                "Input,"
+                "Output,"
                 "Compression Time,"
                 "Peak Memory,"
                 "Cache Size,"
@@ -166,7 +170,8 @@ def main(args):
             )
         f.write(
             f"{model_path},"
-            f"{numpy_compression},"
+            f"{'NumPy' if numpy_compression else 'OV'},"
+            f"{end_to_end_compression},"
             f"{'-' if numpy_compression else 'Dynamic' if dynamic_compression else 'Static'},"
             f"{'-' if numpy_compression else input_dtype.upper()},"
             f"{'-' if numpy_compression else 'INT8' if int8_output else 'FP32'},"