Skip to content

Commit

Permalink
Extend how optimum-intel configs are inherited.
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Jul 25, 2024
1 parent 14f9c2b commit 94a2d38
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 56 deletions.
48 changes: 34 additions & 14 deletions llm_bench/python/utils/conversion_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from nncf import Dataset
from openvino import save_model
import nncf
from ..nncf_utils import COMPRESSION_OPTIONS, INT4_MODEL_CONFIGURATION
from optimum.intel.openvino.configuration import _check_default_4bit_configs
from ..nncf_utils import COMPRESSION_OPTIONS
from optimum.intel.openvino.configuration import get_default_int4_config, _DEFAULT_4BIT_CONFIG, OVQuantizationMethod
import warnings


Expand Down Expand Up @@ -157,7 +157,7 @@ def get_data_aware_args(ov_model, tokenizer, config, compression_args, args):
dataset_args = compression_args['dataset']
dataset_params = dataset_args['name']
if 'sensitivity_metric' in dataset_args:
res['mode'] = dataset_args['sensitivity_metric']
res['sensitivity_metric'] = dataset_args['sensitivity_metric']
if 'awq' in dataset_args:
res['awq'] = dataset_args['awq']
if 'scale_estimation' in dataset_args:
Expand All @@ -172,7 +172,7 @@ def get_data_aware_args(ov_model, tokenizer, config, compression_args, args):
if dataset_params is not None:
# for example "wikitext,wikitext-2-v1,train[:1000],text"
path, name, split, item_name = dataset_params.split(',')
dataset = load_dataset(path, name, split=split)
dataset = load_dataset(path, name, split=split, streaming="allenai/c4" in path)

if path == 'wikitext':
# filter short sentences
Expand All @@ -189,17 +189,37 @@ def compress_ov_model_weights_helper(ov_model, tok, config, out_path, compress_w
if "INT8" in compress_weights_format and "INT8_ASYM" in COMPRESSION_OPTIONS:
warnings.warn("Usage INT8 mode is deprecated and will be removed soon. Please use INT8_ASYM instead", DeprecationWarning)
if "4BIT_DEFAULT" in compress_weights_format:
compression_args = _check_default_4bit_configs(config)
if compression_args:
sym = compression_args.pop("sym", False)
compression_args.pop("bits", 4)
compression_args["mode"] = nncf.CompressWeightsMode.INT4_SYM if sym else nncf.CompressWeightsMode.INT4_ASYM
if compression_args is None:
model_id = out_path.parents[3].name
if model_id in INT4_MODEL_CONFIGURATION:
compression_args = INT4_MODEL_CONFIGURATION[model_id]
compression_args = get_default_int4_config(config.name_or_path)
compression_args.pop("bits")

sym = compression_args.pop("sym", _DEFAULT_4BIT_CONFIG["sym"])
compression_args["mode"] = nncf.CompressWeightsMode.INT4_SYM if sym else nncf.CompressWeightsMode.INT4_ASYM

quant_method = compression_args.pop("quant_method", None)
scale_estimation = compression_args.pop("scale_estimation", False)
sensitivity_metric = compression_args.pop("sensitivity_metric", None)
num_samples = compression_args.pop("num_samples", None)
if num_samples:
compression_args["subset_size"] = num_samples
dataset = compression_args.pop("dataset", None)
if dataset:
if dataset == "wikitext2":
dataset_name = "wikitext,wikitext-2-v1,train[:1000],text"
elif dataset == "c4" or dataset == "c4-new":
dataset_name = "allenai/c4,en,train,text"
else:
compression_args = COMPRESSION_OPTIONS["INT4_ASYM"]
raise ValueError(f"Unrecognized dataset: {dataset}")

dataset_args = {"name": dataset_name}
if quant_method == OVQuantizationMethod.AWQ:
dataset_args["awq"] = True
elif quant_method is not None:
raise ValueError(f"Unrecognised quant_method: {quant_method}")
if scale_estimation:
dataset_args["scale_estimation"] = True
if sensitivity_metric:
dataset_args["sensitivity_metric"] = sensitivity_metric
compression_args["dataset"] = dataset_args

if compression_args is None:
compression_args = COMPRESSION_OPTIONS[compress_weights_format]
Expand Down
42 changes: 0 additions & 42 deletions llm_bench/python/utils/nncf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,45 +35,3 @@

def get_compressed_path(output_dir: str, base_precision, option: str):
return Path(output_dir) / "pytorch/dldt/compressed_weights" / f"OV_{base_precision}-{option}"


INT4_MODEL_CONFIGURATION = {
"dolly-v2-3b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.8},
"gpt-j-6b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64},
"opt-6.7b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64, "ratio": 0.8},
"red-pajama-incite-7b-instruct": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128},
"zephyr-7b-beta": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"llama-2-7b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.6},
"llama-2-7b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
"llama-2-13b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8},
"stablelm-3b-4e1t": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"stablelm-epoch-3b-preview": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"stable-zephyr-3b-dpo": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1.0,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"stable-code-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8},
"rocket-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
"chatglm2-6b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.72},
"qwen-7b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.6},
"open-llama-3b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64, "ratio": 1.0, "all_layers": True},
"falcon-7b-instruct": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True},
"orca-mini-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": False}},
"bloomz-560m": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"mixtral-8x7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
"baichuan2-7b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8,
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
"mistral-7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.9},
"llama-7b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.7},
"opt-2.7b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.7},
"red-pajama-incite-chat-3b-v1": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.8},
"vicuna-7b-v1.5": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1.0},
"stablelm-tuned-alpha-3b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.8},
"longchat-b7": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.9},
"starcoder2-3b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.9},
"tiny-llama-1.1b-chat": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.8},
"phi-2": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 0.9},
}

0 comments on commit 94a2d38

Please sign in to comment.