Skip to content

Update cli.py with inline comments #416

Update cli.py with inline comments

Update cli.py with inline comments #416

Workflow file for this run

name: Run eager tests on MPS
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
jobs:
test-mps:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
script: |
set -eou pipefail
echo "::group::Print machine info"
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
echo "::endgroup::"
echo "::group::Install requirements"
# Install requirements
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
ls -la
pwd
pip install -r requirements.txt
echo "::endgroup::"
echo "::group::Download checkpoints"
(
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
)
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
python generate.py --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "************************************************************"
echo "*** embedding"
echo "************************************************************"
python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "************************************************************"
echo "*** linear int8"
echo "************************************************************"
python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "************************************************************"
echo "*** linear int4"
echo "************************************************************"
PYTORCH_ENABLE_MPS_FALLBACK=1 python generate.py --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager