From c9569bbe1ea9416cdd015a1ecc79ffc6cfea149f Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 26 Sep 2024 10:16:11 +0200 Subject: [PATCH] INT4 experiments --- nncf/openvino/graph/node_utils.py | 3 +- .../quantization/compression_primitives.py | 2 + .../weight_compression/openvino_backend.py | 10 + .../weight_compression/weight_lowering.py | 14 +- run_weight_compression.py | 615 +++++++++--------- 5 files changed, 338 insertions(+), 306 deletions(-) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 2c28214849e..13700bbac29 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -116,7 +116,8 @@ def get_const_value(const_node: ov.Node) -> np.ndarray: :return: The constant value. """ INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32") - if const_node.get_element_type() == ov.Type.bf16 and INPUT_DTYPE != "bf16": + NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))) + if const_node.get_element_type() == ov.Type.bf16 and (INPUT_DTYPE != "bf16" or NUMPY_COMPRESSION): # Fixed FP32 data type as the result for BF16 constant return const_node.get_data(dtype=np.float32) return const_node.data diff --git a/nncf/openvino/quantization/compression_primitives.py b/nncf/openvino/quantization/compression_primitives.py index 8930b7de89b..da29151ee31 100644 --- a/nncf/openvino/quantization/compression_primitives.py +++ b/nncf/openvino/quantization/compression_primitives.py @@ -264,11 +264,13 @@ def _get_compress_model( num_bits = config.num_bits if config.mode in [CompressWeightsMode.INT8_ASYM, config.mode.INT4_ASYM]: dtype = ov.Type.u8 + # dtype = ov.Type.u8 if config.mode == CompressWeightsMode.INT8_ASYM else ov.Type.u4 level_low = 0 level_high = 2**num_bits - 1 compressed_w += opset.convert(zp, ov.Type.f32) elif config.mode in [CompressWeightsMode.INT8_SYM, config.mode.INT4_SYM]: dtype = ov.Type.i8 + # dtype = ov.Type.i8 if config.mode == CompressWeightsMode.INT8_SYM else ov.Type.u4 level_low = -(2 ** (num_bits - 1)) level_high = 2 ** (num_bits - 1) - 1 else: diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index e831cd27513..75d56d155a4 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -8,6 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import Dict, Iterable, List, Optional, Tuple import openvino as ov @@ -226,6 +227,15 @@ def _create_compression_subgraph( original_shape = weight.shape compressed_weight = compress_weight(weight, reduction_axes, compression_config, layer_scales, layer_zero_points) + # NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))) + # FP32_OUTPUT = bool(int(os.environ.get("FP32_OUTPUT", "0"))) + # if compression_config.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT4_SYM] and not NUMPY_COMPRESSION and not FP32_OUTPUT: + # flat_compressed_weight = ov.Tensor(compressed_weight.tensor.data, (compressed_weight.tensor.shape[0]*2, ), compression_dtype) + # compressed_const = opset.constant(flat_compressed_weight) + # compressed_shape = compressed_weight.scale.shape[:2] + (compression_config.group_size,) + # compressed_const = opset.reshape(compressed_const, compressed_shape) + # else: + # compressed_const = opset.constant(compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name) compressed_const = opset.constant(compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name) converted_const = opset.convert(compressed_const, ov.Type.f16) if compressed_weight.zero_point is not None and compressed_weight.tensor.dtype == TensorDataType.uint8: diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py index f1711563076..bc0711e6e2b 100644 --- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -350,15 +350,27 @@ def calculate_quantized_weight( ov_compression = weight.backend in [TensorBackend.numpy, TensorBackend.ov] and is_openvino_available() and not NUMPY_COMPRESSION compressed_weights_ov, scale_ov, zero_point_ov = None, None, None if ov_compression: + import openvino as ov from nncf.openvino.quantization.compression_primitives import OV_COMPRESSION_PRIMITIVE_CACHE + # if INPUT_DTYPE == "bf16": + # assert weight.dtype == TensorDataType.float16 + # input_dtype = ov.Type.bf16 + # elif weight.dtype == TensorDataType.float16: + # input_dtype = ov.Type.f16 + # elif weight.dtype == TensorDataType.float32: + # input_dtype = ov.Type.f32 + # else: + # raise Exception + # input_tensors = (ov.Tensor(weight.data, weight.data.shape, input_dtype),) + if INPUT_DTYPE == "bf16": - import openvino as ov assert weight.data.dtype == np.float16 weight_data = ov.Tensor(weight.data, weight.data.shape, ov.Type.bf16) else: weight_data = weight.data input_tensors = (weight_data,) + if not END_TO_END_COMPRESSION: zero_point_shape = None if zero_point is None else zero_point.shape compiled_model, compress_weight_primitive = OV_COMPRESSION_PRIMITIVE_CACHE.get_compress_weight_primitive( diff --git a/run_weight_compression.py b/run_weight_compression.py index d365c8ea2fc..09c3a36b918 100644 --- a/run_weight_compression.py +++ b/run_weight_compression.py @@ -16,310 +16,317 @@ def stream_handler(stream, target_file): parent_log_dir = Path("compression_logs") experiment_params = [ - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - - - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym "), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym "), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym "), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym "), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym "), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym "), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - - - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym "), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym "), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym "), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym "), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym "), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym "), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + + (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "int4_acc/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "int4_acc/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), ] for model_dir, log_dir, params in experiment_params: