Skip to content

Commit

Permalink
LLM: arc perf test for some popular models
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiguangHan committed Oct 19, 2023
1 parent 905cdd3 commit 7a51e8c
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 5,272 deletions.
52 changes: 9 additions & 43 deletions .github/workflows/llm_performance_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,20 @@ jobs:
THREAD_NUM: 16
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
steps:
- name: Set environment variables
shell: bash
run: |
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
echo "LLAMA2_13B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-13b-chat-hf" >> "$GITHUB_ENV"
echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
echo "WHISPER_MEDIUM_ORIGIN_PATH=${ORIGIN_DIR}/whisper-medium" >> "$GITHUB_ENV"
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
python -m pip install --upgrade omegaconf
python -m pip install --upgrade pandas
python -m pip install --upgrade einops
- name: Download llm binary
uses: ./.github/actions/llm/download-llm-binary
Expand All @@ -122,44 +117,15 @@ jobs:
source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh
- name: Download LLMs
shell: bash
run: |
if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $LLAMA2_13B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_13B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-13b-chat-hf -P $ORIGIN_DIR
fi
if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then
echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
fi
if [ ! -d $WHISPER_MEDIUM_ORIGIN_PATH ]; then
echo "Directory $WHISPER_MEDIUM_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-medium -P $ORIGIN_DIR
fi
- name: Test on xpu
shell: bash
run: |
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
cd python/llm/test/benchmark/gpu
mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
export http_proxy=${HTTP_PROXY}
export https_proxy=${HTTPS_PROXY}
rm -rf test-result || true
mkdir test-result
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_7B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/llama2_7b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_7B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/llama2_7b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_13B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/llama2_13b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python llama2.py --model-dir="${LLAMA2_13B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/llama2_13b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python chatglm2.py --model-dir="${CHATGLM2_6B_ORIGIN_PATH}" --input-tokens=32 --max-new-tokens=32 > test-result/chatglm2_6b-32-32.log
taskset -c 0-$((THREAD_NUM - 1)) python chatglm2.py --model-dir="${CHATGLM2_6B_ORIGIN_PATH}" --input-tokens=1024 --max-new-tokens=1024 > test-result/chatglm2_6b-1024-1024.log
taskset -c 0-$((THREAD_NUM - 1)) python whisper.py --model-dir="${WHISPER_MEDIUM_ORIGIN_PATH}" > test-result/whisper_medium-default-default.log
python ../analyze_log_dir.py --log-dir=./test-result --output-path=./xpu_latency.csv
timestamp=`date '+%Y%m%d'`
curl -T ./xpu_latency.csv ${LLM_FTP_URL}/llm/ggml-actions/perf/xpu_lantency_$timestamp.csv
taskset -c 0-$((THREAD_NUM - 1)) python run.py
curl -T ./*.csv ${LLM_FTP_URL}/llm/ggml-actions/perf/
64 changes: 0 additions & 64 deletions python/llm/test/benchmark/analyze_log_dir.py

This file was deleted.

17 changes: 17 additions & 0 deletions python/llm/test/benchmark/arc-perf-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repo_id:
- 'THUDM/chatglm2-6b'
- 'meta-llama/Llama-2-7b-chat-hf'
- 'meta-llama/Llama-2-13b-chat-hf'
- 'tiiuae/falcon-7b-instruct-with-patch'
- 'mosaicml/mpt-7b-chat'
local_model_hub: '/mnt/disk1/models'
warm_up: 1
num_trials: 3
num_beams: 1 # default to greedy search
in_out_pairs:
- '32-32'
- '1024-128'
- '2048-256'
test_api:
- "transformer_int4_gpu" # on Intel GPU

Loading

0 comments on commit 7a51e8c

Please sign in to comment.