Skip to content

Commit

Permalink
Added release_memory
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Sep 9, 2024
1 parent 3d54344 commit e04e7d1
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 14 deletions.
4 changes: 2 additions & 2 deletions nncf/openvino/quantization/compression_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ def _get_compress_model(

compiled_model = ov.compile_model(model, device_name="CPU")

SHARE_OUTPUTS = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))
return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=SHARE_OUTPUTS)
NOT_SHARED_OUTPUTS = bool(int(os.environ.get("NOT_SHARED_OUTPUTS", "0")))
return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=not NOT_SHARED_OUTPUTS)

@staticmethod
def _get_compress_decompress_model(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,10 @@ def calculate_quantized_weight(
compressed_weights_ov, scale_ov, zero_point_ov = results
else:
compressed_weights_ov, scale_ov = results

RELEASE_MEMORY = bool(int(os.environ.get("RELEASE_MEMORY", "0")))
if RELEASE_MEMORY:
compiled_model.release_memory()
if not ov_compression or COMPARE_WITH_NUMPY:
if weight.dtype != TensorDataType.float32:
weight = weight.astype(TensorDataType.float32)
Expand Down
29 changes: 17 additions & 12 deletions weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,20 @@ def parse_arguments():

parser.add_argument("--input-dtype", type=str, choices=["fp32", "fp16", "bf16"], default="fp32", help="OV model input dtype")

parser.add_argument("--bf16-input", action="store_true", help="Enable BF16 input mode")

parser.add_argument("--int8-output", action="store_true", help="Output in int8")
parser.add_argument("--int8-output", action="store_true", help="Output in (u)int8")

parser.add_argument("--recompile", action="store_true", help="Recompile model every time")

parser.add_argument("--share-outputs", action="store_true", help="Share outputs")
parser.add_argument("--not-shared-outputs", action="store_true", help="Do not share outputs")

parser.add_argument("--save-model", action="store_true", help="Save compressed model")

parser.add_argument("--compare-with-numpy", action="store_true", help="Compare compressed weight with the one computed with NumPy")

parser.add_argument("--invert-numpy-division", action="store_true", help="Invert division when compressing with NumPy")

parser.add_argument("--release-memory", action="store_true", help="Release memory")

return parser.parse_args()


Expand All @@ -73,10 +73,11 @@ def main(args):
input_dtype = args.input_dtype
int8_output = args.int8_output
recompile = args.recompile
share_outputs = args.share_outputs
not_shared_outputs = args.not_shared_outputs
save_model = args.save_model
compare_with_numpy = args.compare_with_numpy
invert_numpy_division = args.invert_numpy_division
release_memory = args.release_memory
if numpy_compression:
log_dir_suffix = "numpy"
if invert_numpy_division:
Expand All @@ -88,8 +89,8 @@ def main(args):
log_dir_suffix = f"{log_dir_suffix}_{f'input-{input_dtype}'}"
if recompile:
log_dir_suffix = f"{log_dir_suffix}_recompile"
if share_outputs:
log_dir_suffix = f"{log_dir_suffix}_share-outputs"
if not_shared_outputs:
log_dir_suffix = f"{log_dir_suffix}_not-shared-outputs"

memory_monitors = []
for memory_type, mem_from_zero in [(MemoryType.RSS, False), (MemoryType.SYSTEM, False), (MemoryType.SYSTEM, True)]:
Expand All @@ -107,9 +108,10 @@ def main(args):
os.environ["INPUT_DTYPE"] = input_dtype
os.environ["INT8_OUTPUT"] = f"{int(int8_output)}"
os.environ["RECOMPILE"] = f"{int(recompile)}"
os.environ["SHARE_OUTPUTS"] = f"{int(share_outputs)}"
os.environ["NOT_SHARED_OUTPUTS"] = f"{int(not_shared_outputs)}"
os.environ["COMPARE_WITH_NUMPY"] = f"{int(compare_with_numpy)}"
os.environ["INVERT_NUMPY_DIVISION"] = f"{int(invert_numpy_division)}"
os.environ["RELEASE_MEMORY"] = f"{int(release_memory)}"

start_time = time.perf_counter()
compressed_model = nncf.compress_weights(model, mode=nncf.CompressWeightsMode.INT8_ASYM)
Expand Down Expand Up @@ -155,9 +157,11 @@ def main(args):
if not csv_exists:
f.write(
"Model Path,"
"Numpy,"
"Submodel Type,"
"Input,Output,"
"Backend,"
"End-to-end,"
"Input Shapes,"
"Input,"
"Output,"
"Compression Time,"
"Peak Memory,"
"Cache Size,"
Expand All @@ -166,7 +170,8 @@ def main(args):
)
f.write(
f"{model_path},"
f"{numpy_compression},"
f"{'NumPy' if numpy_compression else 'OV'},"
f"{end_to_end_compression},"
f"{'-' if numpy_compression else 'Dynamic' if dynamic_compression else 'Static'},"
f"{'-' if numpy_compression else input_dtype.upper()},"
f"{'-' if numpy_compression else 'INT8' if int8_output else 'FP32'},"
Expand Down

0 comments on commit e04e7d1

Please sign in to comment.