From 96ef128a97890db43626741576ae258766d00da3 Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Sun, 12 May 2024 17:37:49 -0700 Subject: [PATCH] Run gguf (#686) * improve updown parser, and use in README.md execution * cut/paste errors * typo: true -> false * we scan each partial line, so need to suppress at partial line level :( * make it twice as nice * improved updown parsing * special handling for lines w/o option * enable run on quantization doc * handle white space before trip backtick * updates * run gguf * updates * add gguf to periodic * build et for gguf * update updown options to handle llama3-8b on macos * secrets * updates --- .github/workflows/run-readme-periodic.yml | 37 +++++++++++-- .github/workflows/run-readme-pr-macos.yml | 67 ++++++++++++++++++++--- .github/workflows/run-readme-pr-mps.yml | 2 + .github/workflows/run-readme-pr.yml | 34 ++++++++++++ docs/GGUF.md | 19 ++++++- scripts/updown.py | 15 ++++- 6 files changed, 157 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run-readme-periodic.yml b/.github/workflows/run-readme-periodic.yml index bf789e204..155d0795e 100644 --- a/.github/workflows/run-readme-periodic.yml +++ b/.github/workflows/run-readme-periodic.yml @@ -42,10 +42,6 @@ jobs: bash -x ./run-readme.sh echo "::endgroup::" - echo "::group::Completion" - echo "tests complete" - echo "*******************************************" - echo "::endgroup::" test-quantization-any: uses: pytorch/test-infra/.github/workflows/linux_job.yml@main @@ -79,6 +75,39 @@ jobs: bash -x ./run-quantization.sh echo "::endgroup::" + test-gguf-any: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + secrets: inherit + with: + runner: linux.g5.4xlarge.nvidia.gpu + secrets-env: "HF_TOKEN_PERIODIC" + gpu-arch-type: cuda + gpu-arch-version: "12.1" + timeout: 60 + script: | + echo "::group::Print machine info" + uname -a + echo "::endgroup::" + + echo "::group::Install newer objcopy that supports --set-section-alignment" + yum install -y devtoolset-10-binutils + export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH + echo "::endgroup::" + + echo "::group::Create script to run gguf" + python3 scripts/updown.py --file docs/GGUF.md > ./run-gguf.sh + # for good measure, if something happened to updown processor, + # and it did not error out, fail with an exit 1 + echo "exit 1" >> ./run-gguf.sh + echo "::endgroup::" + + echo "::group::Run gguf" + echo "*******************************************" + cat ./run-gguf.sh + echo "*******************************************" + bash -x ./run-gguf.sh + echo "::endgroup::" + echo "::group::Completion" echo "tests complete" echo "*******************************************" diff --git a/.github/workflows/run-readme-pr-macos.yml b/.github/workflows/run-readme-pr-macos.yml index ab5091e25..e8a040b4e 100644 --- a/.github/workflows/run-readme-pr-macos.yml +++ b/.github/workflows/run-readme-pr-macos.yml @@ -7,7 +7,7 @@ on: workflow_dispatch: jobs: test-readme-macos: - runs-on: macos-14-xlarge + runs-on: macos-14-xlarge steps: - name: Checkout code uses: actions/checkout@v2 @@ -34,7 +34,7 @@ jobs: echo "::endgroup::" echo "::group::Create script to run README" - python3 scripts/updown.py --file README.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh + python3 scripts/updown.py --file README.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh # for good measure, if something happened to updown processor, # and it did not error out, fail with an exit 1 echo "exit 1" >> ./run-readme.sh @@ -47,12 +47,7 @@ jobs: bash -x ./run-readme.sh echo "::endgroup::" - echo "::group::Completion" - echo "tests complete" - echo "*******************************************" - echo "::endgroup::" - - + test-quantization-macos: runs-on: macos-14-xlarge steps: @@ -81,7 +76,7 @@ jobs: echo "::endgroup::" echo "::group::Create script to run quantization" - python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh + python3 scripts/updown.py --file docs/quantization.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh # for good measure, if something happened to updown processor, # and it did not error out, fail with an exit 1 echo "exit 1" >> ./run-quantization.sh @@ -98,3 +93,57 @@ jobs: echo "tests complete" echo "*******************************************" echo "::endgroup::" + + + test-gguf-macos: + runs-on: macos-14-xlarge + secrets: inherit + steps: + - name: Checkout code + uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.10.11' + - name: Setup Xcode + if: runner.os == 'macOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.3' + - name: Run script + secrets-env: "HF_TOKEN_PERIODIC" + run: | + set -x + # NS: Remove previous installation of torch first + # as this script does not isntall anything into conda env but rather as system dep + pip3 uninstall -y torch || true + set -eou pipefail + + echo "::group::Print machine info" + uname -a + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + echo "::endgroup::" + + # echo "::group::Install newer objcopy that supports --set-section-alignment" + # yum install -y devtoolset-10-binutils + # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH + # echo "::endgroup::" + + echo "::group::Create script to run gguf" + python3 scripts/updown.py --file docs/GGUF.md > ./run-gguf.sh + # for good measure, if something happened to updown processor, + # and it did not error out, fail with an exit 1 + echo "exit 1" >> ./run-gguf.sh + echo "::endgroup::" + + echo "::group::Run gguf" + echo "*******************************************" + cat ./run-gguf.sh + echo "*******************************************" + bash -x ./run-gguf.sh + echo "::endgroup::" + + echo "::group::Completion" + echo "tests complete" + echo "*******************************************" + echo "::endgroup::" diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml index 4b262b89a..e4b502153 100644 --- a/.github/workflows/run-readme-pr-mps.yml +++ b/.github/workflows/run-readme-pr-mps.yml @@ -8,8 +8,10 @@ on: jobs: test-readme-mps-macos: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + secrets: inherit with: runner: macos-m1-14 + secrets-env: "HF_TOKEN_PERIODIC" script: | conda create -y -n test-readme-mps-macos python=3.10.11 conda activate test-readme-mps-macos diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml index 21fb9a1e9..9aab3c74a 100644 --- a/.github/workflows/run-readme-pr.yml +++ b/.github/workflows/run-readme-pr.yml @@ -10,6 +10,7 @@ on: jobs: test-readme-any: uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + secrets: inherit with: runner: linux.g5.4xlarge.nvidia.gpu secrets-env: "HF_TOKEN_PERIODIC" @@ -76,6 +77,39 @@ jobs: bash -x ./run-quantization.sh echo "::endgroup::" + test-gguf-any: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + secrets: inherit + with: + runner: linux.g5.4xlarge.nvidia.gpu + secrets-env: "HF_TOKEN_PERIODIC" + gpu-arch-type: cuda + gpu-arch-version: "12.1" + timeout: 60 + script: | + echo "::group::Print machine info" + uname -a + echo "::endgroup::" + + echo "::group::Install newer objcopy that supports --set-section-alignment" + yum install -y devtoolset-10-binutils + export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH + echo "::endgroup::" + + echo "::group::Create script to run gguf" + python3 scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh + # for good measure, if something happened to updown processor, + # and it did not error out, fail with an exit 1 + echo "exit 1" >> ./run-gguf.sh + echo "::endgroup::" + + echo "::group::Run gguf" + echo "*******************************************" + cat ./run-gguf.sh + echo "*******************************************" + bash -x ./run-gguf.sh + echo "::endgroup::" + echo "::group::Completion" echo "tests complete" echo "*******************************************" diff --git a/docs/GGUF.md b/docs/GGUF.md index 7211c5998..20f6d3425 100644 --- a/docs/GGUF.md +++ b/docs/GGUF.md @@ -1,16 +1,27 @@ # Using GGUF Models -We support parsing [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) files with the following tensor types: + +[shell default]: HF_TOKEN="${SECRET_HF_TOKEN_PERIODIC}" huggingface-cli login + +[shell default]: TORCHCHAT_ROOT=${PWD} ./scripts/install_et.sh + +We support parsing [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) files with +the following tensor types: - F16 - F32 - Q4_0 - Q6_K -If an unsupported type is encountered while parsing a GGUF file, an exception is raised. +If an unsupported type is encountered while parsing a GGUF file, an +exception is raised. We now go over an example of using GGUF files in the torchchat flow. ### Download resources -First download a GGUF model and tokenizer. In this example, we use a Q4_0 GGUF file. (Note that Q4_0 is only the dominant tensor type in the file, but the file also contains GGUF tensors of types Q6_K, F16, and F32.) + +First download a GGUF model and tokenizer. In this example, we use a +Q4_0 GGUF file. (Note that Q4_0 is only the dominant tensor type in +the file, but the file also contains GGUF tensors of types Q6_K, F16, +and F32.) ``` # Download resources @@ -55,3 +66,5 @@ python3 torchchat.py export --gguf-path ${GGUF_MODEL_PATH} --output-pte-path ${G # Generate using the PTE model that was created by the export command python3 torchchat.py generate --gguf-path ${GGUF_MODEL_PATH} --pte-path ${GGUF_PTE_PATH} --tokenizer-path ${GGUF_TOKENIZER_PATH} --temperature 0 --prompt "Once upon a time" --max-new-tokens 15 ``` + +[end default]: end diff --git a/scripts/updown.py b/scripts/updown.py index b59ff738a..8906c63d6 100644 --- a/scripts/updown.py +++ b/scripts/updown.py @@ -140,7 +140,7 @@ def process_command( ) elif keyword == "prefix": output( - trailing_command[:-1], + trailing_command, end="", replace_list=replace_list, suppress_list=suppress_list, @@ -178,6 +178,19 @@ def process_command( suppress_list=suppress_list, ) exit(0) + elif keyword == "comment": + output( + "# " + trailing_command, + suppress_list=None, + replace_list=None, + ) + else: + output( + "echo 'unknown updown command'\nexit 1", + suppress_list=None, + replace_list=None, + ) + exit(1) # We have processed this line as a command return True