Skip to content

Commit

Permalink
Improve logging in LLM tune hyperparams example (openvinotoolkit#2470)
Browse files Browse the repository at this point in the history
### Changes

Print info about `all_layers` param more clearly
CI examples build: 242

### Reason for changes

<!--- Why should the change be applied -->

### Related tickets

<!--- Post the numerical ID of the ticket, if available -->

### Tests

<!--- How was the correctness of changes tested and whether new tests
were added -->
  • Loading branch information
l-bat authored Feb 14, 2024
1 parent 287ec6b commit 4c360c9
Showing 1 changed file with 22 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ def evaluate_model(
group_size = optimized_model.get_rt_info()["nncf"]["weight_compression"]["group_size"].value
ratio = float(optimized_model.get_rt_info()["nncf"]["weight_compression"]["ratio"].value)
awq = optimized_model.get_rt_info()["nncf"]["weight_compression"]["awq"].value
nncf_logger.info(
"The similarity of model compressed with "
f"group_size={group_size}, ratio={ratio:.1f}, awq={awq} is {similarity:.3f}"
)
all_layers = optimized_model.get_rt_info()["nncf"]["weight_compression"]["all_layers"].value
params_info = f"The similarity of model compressed with group_size={group_size}, ratio={ratio:.1f}, awq={awq}"
if all_layers == "True":
params_info = params_info + ", all_layers=True"
nncf_logger.info(params_info + f" is {similarity:.3f}")
return similarity


Expand All @@ -112,21 +113,25 @@ def get_nncf_dataset(
return nncf.Dataset(data_source)


def print_results(optimized_model: ov.Model, ratio: float, group_size: int, awq: bool, similarity: float) -> None:
def print_results(optimized_model: ov.Model, similarity: float) -> None:
"""
Print report with optimization details, memory footprint, and similarity score.
:param optimized_model: The OpenVINO model with compressed weights.
:param ratio: The ratio between baseline and backup precisions
:param group_size: Number of weights (e.g. 128) in the channel dimension
that share quantization parameters (scale).
:param awq: Indicates whether use AWQ weights correction.
:param similarity: The similarity score between the original and optimized models.
"""
ov.save_model(optimized_model, MODEL_PATH)
footprint = Path(MODEL_PATH).with_suffix(".bin").stat().st_size
print(f"Compressed model was saved to: {MODEL_PATH}")
print(f"Best parameters: group_size={group_size}, ratio={ratio:.1f}, awq={awq}")
group_size = optimized_model.get_rt_info()["nncf"]["weight_compression"]["group_size"].value
ratio = float(optimized_model.get_rt_info()["nncf"]["weight_compression"]["ratio"].value)
awq = optimized_model.get_rt_info()["nncf"]["weight_compression"]["awq"].value
all_layers = optimized_model.get_rt_info()["nncf"]["weight_compression"]["all_layers"].value
best_params_info = f"Best parameters: group_size={group_size}, ratio={ratio:.1f}, awq={awq}"
if all_layers == "True":
print(best_params_info + ", all_layers=True")
else:
print(best_params_info)
footprint = Path(MODEL_PATH).with_suffix(".bin").stat().st_size
print(f"Memory footprint: {footprint / 2**20 :.2f} MB")
print(f"Similarity: {similarity:.2f}")

Expand Down Expand Up @@ -155,7 +160,6 @@ def find_parameters(
optimized_model = compress_model(original_ov_model, nncf_dataset, ratio, group_size, awq=use_awq)
similarity = evaluate_fn(optimized_model=optimized_model)
if similarity >= 1 - MAX_DROP:
nncf_logger.info(f"Compress embeddings and last layers to {COMPRESSION_MODE.value} precision")
# If model with the maximum ratio and group_size is acceptable,
# we try to compress embeddings and last MatMul layers to a primary precision
full_optimized_model = nncf.compress_weights(
Expand All @@ -167,18 +171,17 @@ def find_parameters(
)
all_layers_similarity = evaluate_fn(optimized_model=full_optimized_model)
if all_layers_similarity >= 1 - MAX_DROP:
print("Compressed embeddings and last layers to a primary precision.")
print_results(full_optimized_model, ratio, group_size, use_awq, all_layers_similarity)
print_results(full_optimized_model, all_layers_similarity)
else:
print_results(optimized_model, ratio, group_size, use_awq, similarity)
print_results(optimized_model, similarity)
return use_awq, ratio, group_size

# If the best performing model is not acceptable, we try to use AWQ weights correction and compare similarity
use_awq = True
optimized_model = compress_model(original_ov_model, nncf_dataset, ratio, group_size, awq=use_awq)
awq_similarity = evaluate_fn(optimized_model=optimized_model)
if awq_similarity >= 1 - MAX_DROP:
print_results(optimized_model, ratio, group_size, use_awq, awq_similarity)
print_results(optimized_model, awq_similarity)
return use_awq, ratio, group_size
use_awq = awq_similarity > similarity

Expand All @@ -193,19 +196,19 @@ def find_parameters(
"but it could not achieve the required accuracy drop. "
"We recommend choosing a different mode for weight compression."
)
print_results(optimized_model, ratio, group_size, use_awq, similarity)
print_results(optimized_model, similarity)
return use_awq, ratio, group_size

# If max drop criterion is achivable, we run a grid-search to find the best parameters
for ratio, group_size in param_grid[1:-1]:
optimized_model = compress_model(original_ov_model, nncf_dataset, ratio, group_size, awq=use_awq)
similarity = evaluate_fn(optimized_model=optimized_model)
if similarity >= 1 - MAX_DROP:
print_results(optimized_model, ratio, group_size, use_awq, similarity)
print_results(optimized_model, similarity)
return use_awq, ratio, group_size

optimized_model = compress_model(original_ov_model, nncf_dataset, MIN_RATIO, MIN_GROUP_SIZE, awq=use_awq)
print_results(optimized_model, MIN_RATIO, MIN_GROUP_SIZE, use_awq, similarity)
print_results(optimized_model, similarity)
return use_awq, MIN_RATIO, MIN_GROUP_SIZE


Expand Down

0 comments on commit 4c360c9

Please sign in to comment.