Skip to content

Commit

Permalink
Add quant workflow to validate.sh (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
guangy10 authored and malfet committed Jul 17, 2024
1 parent 4c5c401 commit e129505
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 254 deletions.
125 changes: 120 additions & 5 deletions .ci/scripts/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function generate_eager_model_output() {
local TARGET_DEVICE="${2:-cpu}"
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
echo "Run inference with eager model for $MODEL_NAME"
echo "Run inference with eager model"
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
}
Expand All @@ -24,28 +24,143 @@ function generate_compiled_model_output() {
local TARGET_DEVICE="${2:-cpu}"
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
echo ""############### Run inference with torch.compile for $MODEL_NAME "###############"

echo ""############### Run inference with torch.compile "###############"
echo ""
echo "******************************************"
echo "************** non-quantized *************"
echo "******************************************"
python -W ignore generate.py --compile --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
python -W ignore generate.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "******************************************"
echo "******** Emb: group-wise quantized *******"
echo "******************************************"
python -W ignore generate.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "***********************************************"
echo "******* Emb: 4bit channel-wise quantized ******"
echo "***********************************************"
python -W ignore generate.py --quant '{"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "***********************************************"
echo "******** Emb: 4bit group-wise quantized *******"
echo "***********************************************"
python -W ignore generate.py --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
python -W ignore generate.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "******************************************"
echo "******** INT8 group-wise quantized *******"
echo "******************************************"
python -W ignore generate.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

echo "******************************************"
echo "******** INT4 group-wise quantized *******"
echo "******************************************"
python -W ignore generate.py --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
cat "$MODEL_DIR/output_eager"
python -W ignore generate.py --compile --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"
}

function generate_aoti_model_output() {
local CHECKPOINT_PATH="$1"
local TARGET_DEVICE="${2:-cpu}"
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
echo ""############### Run inference with AOTInductor for $MODEL_NAME "###############"
python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path "${MODEL_DIR}/${MODEL_NAME}.so" --device "$TARGET_DEVICE"

echo ""############### Run inference with AOT Inductor "###############"
echo ""
echo "******************************************"
echo "************** non-quantized *************"
echo "******************************************"
python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path "${MODEL_DIR}/${MODEL_NAME}.so" --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --dso-path "$MODEL_DIR/${MODEL_NAME}.so" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
python -W ignore export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "******************************************"
echo "******** Emb: group-wise quantized *******"
echo "******************************************"
python -W ignore export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "***********************************************"
echo "******* Emb: 4bit channel-wise quantized ******"
echo "***********************************************"
python -W ignore export.py --quant '{"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "***********************************************"
echo "******** Emb: 4bit group-wise quantized *******"
echo "***********************************************"
python -W ignore export.py --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
python -W ignore export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "******************************************"
echo "******** INT8 group-wise quantized *******"
echo "******************************************"
python -W ignore export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

echo "******************************************"
echo "******** INT4 group-wise quantized *******"
echo "******************************************"
python -W ignore export.py --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"
}

function generate_executorch_model_output() {
local CHECKPOINT_PATH="$1"
local TARGET_DEVICE="${2:-cpu}"
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
echo ""############### Run inference with ExecuTorch using XNNPACK for $MODEL_NAME "###############"
echo ""############### Run inference with ExecuTorch using XNNPACK "###############"
python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-pte-path "$MODEL_DIR/${MODEL_NAME}.pte" -d "fp32" || exit 1
python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" --pte-path "$MODEL_DIR/${MODEL_NAME}.pte" > "$MODEL_DIR/output_et" || exit 1
cat "$MODEL_DIR/output_et"
Expand Down
Loading

0 comments on commit e129505

Please sign in to comment.