Skip to content

Commit

Permalink
upgrade lm_eval to 0.4.2 following ITREX (#1727)
Browse files Browse the repository at this point in the history
Signed-off-by: xin3he <[email protected]>
Co-authored-by: chensuyue <[email protected]>
  • Loading branch information
xin3he and chensuyue authored Apr 25, 2024
1 parent 4351bc8 commit fdb5097
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 79 deletions.
1 change: 1 addition & 0 deletions .azure-pipelines/model-test-3x.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pr:
include:
- neural_compressor/common
- neural_compressor/torch
- examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm
- setup.py
- requirements_pt.txt
- .azure-pipelines/scripts/models
Expand Down
1 change: 1 addition & 0 deletions .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ subprojects:
paths:
- "neural_compressor/common/**"
- "neural_compressor/torch/**"
- "examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**"
- "setup.py"
- "requirements_pt.txt"
- ".azure-pipelines/scripts/models/**"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ wandb
einops
neural-compressor
intel-extension-for-transformers
lm-eval
lm_eval==0.4.2
peft
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
parser.add_argument("--calib_iters", default=512, type=int,
help="calibration iters.")
parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
type=str, help="tasks list for accuracy validation")
type=str, help="tasks for accuracy validation")
parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
# ============SmoothQuant configs==============
parser.add_argument("--sq", action="store_true")
Expand Down Expand Up @@ -372,39 +372,36 @@ def run_fn(model):
)
user_model.save(args.output_dir)

if args.int8 or args.int8_bf16_mixed:
print("load int8 model")

from neural_compressor.torch.algorithms.static_quant import load
# TODO: we need run_benchmark.sh for loading and remove --accuracy in run_quant.sh, currently run_quant.sh will get fp32 result
# if args.int8 or args.int8_bf16_mixed:
# print("load int8 model")

if args.ipex:
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
else:
# TODO: WOQ save&load
print("Int8 model loading does not support WeightOnlyQuant now.")
pass
else:
user_model, _ = get_user_model()
# # TODO: from neural_compressor.torch.quantization import load
# from neural_compressor.torch.algorithms.static_quant import load

# if args.ipex:
# user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
# else:
# # TODO: WOQ save&load
# print("Int8 model loading does not support WeightOnlyQuant now.")
# pass
# else:
# user_model, _ = get_user_model()


if args.accuracy:
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
model_args='pretrained=' + args.model + ',tokenizer=' + args.model + ',dtype=float32',
model="hf",
user_model=user_model,
tokenizer = tokenizer,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
device="cpu",
)
results = evaluate(eval_args)

dumped = json.dumps(results, indent=2)
if args.save_accuracy_path:
with open(args.save_accuracy_path, "w") as f:
f.write(dumped)
for task_name in args.tasks.split(","):
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity,none"]
Expand All @@ -415,16 +412,14 @@ def run_fn(model):

if args.performance:
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
import time

samples = args.iters * args.batch_size
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
model_args='pretrained=' + args.model + ',tokenizer=' + args.model + ',dtype=float32',
model="hf",
user_model=user_model,
tokenizer = tokenizer,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
limit=samples,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
accelerate
datasets
einops
intel_extension_for_transformers
intel-extension-for-transformers
optimum
peft
sentencepiece
Expand All @@ -10,4 +10,4 @@ torch
tqdm
tiktoken
transformers_stream_generator
git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2
lm_eval==0.4.2
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def skip(*args, **kwargs):
from timers import CPUTimer, GPUTimer
from neural_compressor.training import WeightPruningConfig, prepare_pruning
from neural_compressor.compression.pruner import (parse_auto_slim_config)
from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate

check_min_version("4.27.0.dev0")
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -271,8 +270,8 @@ def parse_args():
help="Transformers parameter: use the external repo")

# Evaluation config
parser.add_argument("--tasks", default=["lambada_openai"],
help="Usually chosen with ['lambada_openai','hellaswag','winogrande','piqa']",
parser.add_argument("--tasks", default="lambada_openai",
type=str, help="tasks for accuracy validation",
)
parser.add_argument("--use_accelerate", action='store_true',
help="Usually use to accelerate evaluation for large models"
Expand Down Expand Up @@ -588,14 +587,17 @@ def group_texts(examples):
model_args = f'pretrained={model_name},tokenizer={model_name},dtype={dtype},use_accelerate={args.use_accelerate},trust_remote_code={args.trust_remote_code}'
eval_batch = args.per_device_eval_batch_size
user_model = None if args.use_accelerate else model
results = evaluate(
model="hf-causal",
model_args=model_args,
user_model=user_model,
batch_size=eval_batch,
tasks=args.tasks,
device=device,
)

from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=eval_batch,
tasks=args.tasks,
device=device,
)
results = evaluate(eval_args)

if __name__ == "__main__":
main()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ wandb
einops
neural-compressor
intel-extension-for-transformers
git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2
lm_eval==0.4.2
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,8 @@
help="Pad input ids to max length.")
parser.add_argument("--calib_iters", default=512, type=int,
help="calibration iters.")
parser.add_argument("--tasks", nargs='+', default=["lambada_openai",
"hellaswag", "winogrande", "piqa", "wikitext"],
type=str, help="tasks list for accuracy validation, text-generation and code-generation tasks are different.")
parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
type=str, help="tasks for accuracy validation, text-generation and code-generation tasks are different.")
parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
# ============SmoothQuant configs==============
parser.add_argument("--sq", action="store_true")
Expand Down Expand Up @@ -351,62 +350,82 @@ def eval_func(model):
if args.accuracy:
user_model.eval()
if args.code_generation:
from intel_extension_for_transformers.llm.evaluation.lm_code_eval import evaluate
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
results = evaluate(
model=user_model,
tokenizer=tokenizer,
tasks=",".join(args.tasks),
tasks=args.tasks,
batch_size=args.batch_size,
args=args,
)
for task_name in args.tasks:
if task_name == "truthfulqa_mc":
acc = results["results"][task_name]["mc1"]
else:
acc = results["results"][task_name]["acc"]
else:
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate
results = evaluate(
model="hf-causal",
model_args='pretrained=' + args.model + ',tokenizer=' + args.model + ',dtype=float32',
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
device="cpu",
)
results = evaluate(eval_args)
for task_name in args.tasks.split(","):
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity,none"]
else:
acc = results["results"][task_name]["acc,none"]

dumped = json.dumps(results, indent=2)
if args.save_accuracy_path:
with open(args.save_accuracy_path, "w") as f:
f.write(dumped)
for task_name in args.tasks:
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity"]
else:
acc = results["results"][task_name]["acc"]
print("Accuracy: %.5f" % acc)
print('Batch size = %d' % args.batch_size)

if args.performance:
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate
import time

user_model.eval()
samples = args.iters * args.batch_size
start = time.time()
results = evaluate(
model="hf-causal",
model_args='pretrained=' + args.model + ',tokenizer=' + args.model \
+ ',dtype=float32' + ",trust_remote_code=" + str(args.trust_remote_code),
user_model=user_model,
batch_size=args.batch_size,
tasks=args.tasks,
limit=samples,
)
end = time.time()
for task_name in args.tasks:
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity"]
elif task_name == "truthfulqa_mc":
acc = results["results"][task_name]["mc1"]
else:
acc = results["results"][task_name]["acc"]

if args.code_generation:
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
start = time.time()
results = evaluate(
model=user_model,
tokenizer=tokenizer,
tasks=args.tasks,
batch_size=args.batch_size,
args=args,
)
end = time.time()
for task_name in args.tasks:
if task_name == "truthfulqa_mc":
acc = results["results"][task_name]["mc1"]
else:
acc = results["results"][task_name]["acc"]
else:
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
device="cpu",
)
start = time.time()
results = evaluate(eval_args)
end = time.time()
for task_name in args.tasks.split(","):
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity,none"]
else:
acc = results["results"][task_name]["acc,none"]
print("Accuracy: %.5f" % acc)
print('Throughput: %.3f samples/sec' % (samples / (end - start)))
print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
Expand Down

0 comments on commit fdb5097

Please sign in to comment.