Skip to content

Commit

Permalink
Add fp8 test to jenkins CI (#429)
Browse files Browse the repository at this point in the history
  • Loading branch information
afierka-intel committed Oct 30, 2024
1 parent 2df2f1f commit 049d9dc
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# FIXME(kzawora): these scores were generated using vLLM on HPU, we need to confirm them on HF
# VLLM_SKIP_WARMUP=true bash run-lm-eval-gsm-cot-llama-vllm-baseline.sh -m "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct" -b 128 -l 1319 -f 8 -t 1
model_name: "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct"
tasks:
- name: "gsm8k_cot_llama"
metrics:
- name: "exact_match,strict-match"
value: 0.8317
- name: "exact_match,flexible-extract"
value: 0.8355
limit: null
num_fewshot: 8
dtype: "bfloat16"
fewshot_as_multiturn: true
apply_chat_template: true
fp8: true
1 change: 1 addition & 0 deletions .jenkins/lm-eval-harness/configs/models-fp8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Meta-Llama-3.1-8B-Instruct-fp8.yaml
19 changes: 15 additions & 4 deletions .jenkins/lm-eval-harness/test_lm_eval_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)


def setup_fp8(model_path, device_type):
flavor = f"g{device_type[-1]}"
normalized_model_name = Path(model_path).parts[-1].lower()
os.environ[
"QUANT_CONFIG"] = \
f"/software/data/vllm-benchmarks/inc/{normalized_model_name}/maxabs_quant_{flavor}.json"


def fail_on_exit():
os._exit(1)

Expand All @@ -42,10 +50,10 @@ def launch_lm_eval(eval_config):
f"max_model_len=4096," \
f"max_num_seqs={max_num_seqs}," \
f"trust_remote_code={trust_remote_code}"
if eval_config.get("num_scheduler_steps"):
model_args += \
f",num_scheduler_steps={eval_config.get('num_scheduler_steps')}"
print(f"MODEL_ARGS: {model_args}")
if eval_config.get("fp8"):
model_args += ",quantization=inc," \
"kv_cache_dtype=fp8_inc," \
"weights_load_device=cpu"
kwargs = {}
if 'fewshot_as_multiturn' in eval_config:
kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn']
Expand Down Expand Up @@ -138,6 +146,9 @@ def test_lm_eval_correctness(record_xml_attribute, record_property):
f'tp{TP_SIZE}')
record_xml_attribute("name", testname)

# Set up environment for FP8 inference
if eval_config.get("fp8"):
setup_fp8(eval_config["model_name"], platform)
# Launch eval requests.
start_time = time.perf_counter()
results = launch_lm_eval(eval_config)
Expand Down
5 changes: 5 additions & 0 deletions .jenkins/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ stages:
- name: gsm8k_large_g2_tp4
flavor: g2.m
command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4
- name: test_gsm8k_fp8
steps:
- name: gsm8k_small_g3_tp1_fp8
flavor: g3
command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-fp8.txt -t 1

0 comments on commit 049d9dc

Please sign in to comment.