Skip to content

Commit

Permalink
Remove chatglm_C Module to Eliminate LGPL Dependency (intel-analytics…
Browse files Browse the repository at this point in the history
…#11178)

* remove chatglm_C.**.pyd to solve ngsolve weak copyright vunl

* fix style check error

* remove chatglm native int4 from langchain
  • Loading branch information
liu-shaojun authored May 31, 2024
1 parent 50b5f44 commit 401013a
Show file tree
Hide file tree
Showing 14 changed files with 19 additions and 690 deletions.
101 changes: 0 additions & 101 deletions .github/workflows/llm-binary-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ jobs:
export http_proxy=${HTTP_PROXY}
export https_proxy=${HTTPS_PROXY}
yum install -y gcc-toolset-11 cmake git
conda remove -n python39 --all -y
conda create -n python39 python=3.9 -y
conda remove -n python310 --all -y
conda create -n python310 python=3.10 -y
conda remove -n python311 --all -y
conda create -n python311 python=3.11 -y
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
with:
repository: "intel-analytics/llm.cpp"
Expand Down Expand Up @@ -109,42 +103,6 @@ jobs:
mv build/libstarcoder-api.so release/libstarcoder-api.so
mv build/quantize-starcoder release/quantize-starcoder
mv build/libstarcoder.so release/libstarcoder_avxvnni.so
- name: Build Chatglm
shell: bash
run: |
source activate python39 || conda activate python39
cd src/chatglm
scl enable gcc-toolset-11 "cmake -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
- name: Move Chatglm binaries
shell: bash
run: |
mv src/chatglm/build/main release/main-chatglm_vnni
mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so release/chatglm_C.cpython-39-x86_64-linux-gnu.so
- name: Build Chatglm Py310
shell: bash
run: |
source activate python310 || conda activate python310
cd src/chatglm
rm -r build
scl enable gcc-toolset-11 "cmake -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
- name: Move Chatglm binaries Py310
shell: bash
run: |
mv src/chatglm/build/_C.cpython-310-x86_64-linux-gnu.so release/chatglm_C.cpython-310-x86_64-linux-gnu.so
- name: Build Chatglm Py311
shell: bash
run: |
source activate python311 || conda activate python311
cd src/chatglm
rm -r build
scl enable gcc-toolset-11 "cmake -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
- name: Move Chatglm binaries Py311
shell: bash
run: |
mv src/chatglm/build/_C.cpython-311-x86_64-linux-gnu.so release/chatglm_C.cpython-311-x86_64-linux-gnu.so
- name: Archive build files
uses: actions/upload-artifact@v3
with:
Expand All @@ -155,9 +113,6 @@ jobs:
shell: bash
run: |
make clean
conda remove -n python39 --all -y
conda remove -n python310 --all -y
conda remove -n python311 --all -y
check-linux-avx512-artifact:
if: ${{contains(inputs.platform, 'Linux')}}
Expand Down Expand Up @@ -286,8 +241,6 @@ jobs:
export http_proxy=${HTTP_PROXY}
export https_proxy=${HTTPS_PROXY}
yum install -y gcc-toolset-11 cmake git
conda remove -n python39 --all -y
conda create -n python39 python=3.9 -y
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
with:
repository: "intel-analytics/llm.cpp"
Expand All @@ -299,11 +252,6 @@ jobs:
run: |
scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
# build chatglm
source activate python39 || conda activate python39
cd src/chatglm
scl enable gcc-toolset-11 "cmake -B build"
scl enable gcc-toolset-11 "cmake --build build --config Release -j"
- name: Move amx release binary
shell: bash
run: |
Expand All @@ -316,9 +264,6 @@ jobs:
mv build/libgptneox.so amx_release/libgptneox_amx.so
mv build/quantize-starcoder amx_release/quantize-starcoder_amx
mv build/libstarcoder.so amx_release/libstarcoder_amx.so
# chatglm binary files
mv src/chatglm/build/main amx_release/main-chatglm_amx
# mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so amx_release/chatglm_C.cpython-39-x86_64-linux-gnu.so
- name: Archive amx build files
uses: actions/upload-artifact@v3
with:
Expand All @@ -329,7 +274,6 @@ jobs:
shell: bash
run: |
make clean
conda remove -n python39 --all -y
check-windows-avx2-artifact:
if: ${{contains(inputs.platform, 'Windows')}}
Expand Down Expand Up @@ -393,10 +337,6 @@ jobs:
needs: check-windows-avx-vnni-artifact
if: needs.check-windows-avx-vnni-artifact.outputs.if-exists == 'false'
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Set access token
run: |
echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
Expand Down Expand Up @@ -438,47 +378,6 @@ jobs:
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
- name: Build Chatglm
shell: powershell
run: |
cd src/chatglm
cmake -DAVXVNNI=ON -B build
cmake --build build --config Release -j
- name: Move Chatglm binaries
shell: powershell
run: |
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
mv src/chatglm/build/Release/_C.cp39-win_amd64.pyd release/chatglm_C.cp39-win_amd64.pyd
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Build Chatglm Py310
shell: powershell
run: |
cd src/chatglm
rm -r build
cmake -DAVXVNNI=ON -B build
cmake --build build --config Release -j
- name: Move Chatglm binaries Py310
shell: powershell
run: |
mv src/chatglm/build/Release/_C.cp310-win_amd64.pyd release/chatglm_C.cp310-win_amd64.pyd
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Build Chatglm Py311
shell: powershell
run: |
cd src/chatglm
rm -r build
cmake -DAVXVNNI=ON -B build
cmake --build build --config Release -j
- name: Move Chatglm binaries Py311
shell: powershell
run: |
mv src/chatglm/build/Release/_C.cp311-win_amd64.pyd release/chatglm_C.cp311-win_amd64.pyd
- name: Archive build files
uses: actions/upload-artifact@v3
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ You may also convert Hugging Face *Transformers* models into native INT4 format,
```eval_rst
.. note::
* Currently only llama/bloom/gptneox/starcoder/chatglm model families are supported; for other models, you may use the Hugging Face ``transformers`` INT4 format as described `above <./langchain_api.html#using-hugging-face-transformers-int4-format>`_.
* Currently only llama/bloom/gptneox/starcoder model families are supported; for other models, you may use the Hugging Face ``transformers`` INT4 format as described `above <./langchain_api.html#using-hugging-face-transformers-int4-format>`_.
* You may choose the corresponding API developed for specific native models to load the converted model.
```
Expand All @@ -41,9 +41,9 @@ from ipex_llm.langchain.llms import LlamaLLM
from ipex_llm.langchain.embeddings import LlamaEmbeddings
from langchain.chains.question_answering import load_qa_chain

# switch to ChatGLMEmbeddings/GptneoxEmbeddings/BloomEmbeddings/StarcoderEmbeddings to load other models
# switch to GptneoxEmbeddings/BloomEmbeddings/StarcoderEmbeddings to load other models
embeddings = LlamaEmbeddings(model_path='/path/to/converted/model.bin')
# switch to ChatGLMLLM/GptneoxLLM/BloomLLM/StarcoderLLM to load other models
# switch to GptneoxLLM/BloomLLM/StarcoderLLM to load other models
ipex_llm = LlamaLLM(model_path='/path/to/converted/model.bin')

doc_chain = load_qa_chain(ipex_llm, ...)
Expand Down
14 changes: 1 addition & 13 deletions docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ IPEX-LLM provides ``TransformersLLM`` and ``TransformersPipelineLLM``, which imp
Native Model
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following LLM wrappers with the native (cpp) implementation for maximum performance.
For ``llama``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following LLM wrappers with the native (cpp) implementation for maximum performance.

.. tabs::

Expand All @@ -47,18 +47,6 @@ For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, yo
.. automethod:: stream
.. automethod:: get_num_tokens

.. tab:: ChatGLM

.. autoclass:: ipex_llm.langchain.llms.ChatGLMLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model, ggml_module, client, model_path, kwargs

.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens

.. tab:: Bloom

.. autoclass:: ipex_llm.langchain.llms.BloomLLM
Expand Down
9 changes: 4 additions & 5 deletions python/llm/example/CPU/Native-Models/native_int4_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def load(model_path, model_family, n_threads):
"llama": LlamaForCausalLM,
"gptneox": GptneoxForCausalLM,
"bloom": BloomForCausalLM,
"starcoder": StarcoderForCausalLM,
"chatglm": ChatGLMForCausalLM
"starcoder": StarcoderForCausalLM
}

if model_family in model_family_to_class:
Expand All @@ -55,7 +54,7 @@ def load(model_path, model_family, n_threads):

def inference(llm, repo_id_or_model_path, model_family, prompt):

if model_family in ['llama', 'gptneox', 'bloom', 'starcoder', 'chatglm']:
if model_family in ['llama', 'gptneox', 'bloom', 'starcoder']:
# ------ Option 1: Use IPEX-LLM based tokenizer
print('-'*20, ' IPEX-LLM based tokenizer ', '-'*20)
st = time.time()
Expand Down Expand Up @@ -109,9 +108,9 @@ def main():
parser.add_argument('--thread-num', type=int, default=2, required=True,
help='Number of threads to use for inference')
parser.add_argument('--model-family', type=str, default='llama', required=True,
choices=["llama", "llama2", "bloom", "gptneox", "starcoder", "chatglm"],
choices=["llama", "llama2", "bloom", "gptneox", "starcoder"],
help="The model family of the large language model (supported option: 'llama', 'llama2', "
"'gptneox', 'bloom', 'starcoder', 'chatglm')")
"'gptneox', 'bloom', 'starcoder')")
parser.add_argument('--repo-id-or-model-path', type=str, required=True,
help='The path to the huggingface checkpoint folder')
parser.add_argument('--prompt', type=str, default='Once upon a time, there existed a little girl who liked to have adventures. ',
Expand Down
15 changes: 2 additions & 13 deletions python/llm/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,7 @@
"quantize-llama_vnni.exe",
"quantize-gptneox_vnni.exe",
"quantize-bloom_vnni.exe",
"quantize-starcoder_vnni.exe",

"main-chatglm_vnni.exe",
"chatglm_C.cp39-win_amd64.pyd",
"chatglm_C.cp310-win_amd64.pyd",
"chatglm_C.cp311-win_amd64.pyd"
"quantize-starcoder_vnni.exe"
]
linux_binarys = [
"libllama_avx.so",
Expand Down Expand Up @@ -125,13 +120,7 @@
"main-llama",
"main-gptneox",
"main-bloom",
"main-starcoder",

"main-chatglm_vnni",
"main-chatglm_amx",
"chatglm_C.cpython-39-x86_64-linux-gnu.so",
"chatglm_C.cpython-310-x86_64-linux-gnu.so",
"chatglm_C.cpython-311-x86_64-linux-gnu.so"
"main-starcoder"
]

ext_lib_urls = [
Expand Down
4 changes: 0 additions & 4 deletions python/llm/src/ipex_llm/ggml/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,6 @@ def _convert_starcoder(model_path, outfile_dir, outtype):
_convert_starcoder_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_chatglm(model_path, outfile_dir, outtype):
return _convert_chatglm_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_to_ggml(model_path: str, outfile_dir: str,
model_family: str = 'llama', outtype: str="fp16"):
"""
Expand Down
12 changes: 3 additions & 9 deletions python/llm/src/ipex_llm/ggml/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import os
import time
from pathlib import Path
from ipex_llm.ggml.convert import _convert_to_ggml, _convert_chatglm
from ipex_llm.ggml.convert import _convert_to_ggml
from ipex_llm.ggml.quantize import quantize
from ipex_llm.utils.common import invalidInputError
import argparse
Expand Down Expand Up @@ -54,9 +54,9 @@ def convert_model(input_path: str,
# make sure directory exists
os.makedirs(output_path, exist_ok=True)
# check input value
invalidInputError(model_family in ['llama', 'bloom', 'gptneox', 'starcoder', 'chatglm'],
invalidInputError(model_family in ['llama', 'bloom', 'gptneox', 'starcoder'],
"Now we only support quantization of model \
family('llama', 'bloom', 'gptneox', 'starcoder', 'chatglm')",
family('llama', 'bloom', 'gptneox', 'starcoder')",
"{} is not in the list.".format(model_family))
invalidInputError(os.path.isdir(output_path),
"The output_path {} was not a directory".format(output_path))
Expand All @@ -78,12 +78,6 @@ def convert_model(input_path: str,
family('llama', 'gptneox', 'starcoder')",
"{} is not in the list.".format(model_family))

# chatglm merges convertion and quantization into one operation.
if model_family == 'chatglm':
return _convert_chatglm(model_path=input_path,
outfile_dir=output_path,
outtype=dtype)

if tmp_path is not None:
model_name = Path(input_path).stem
tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
Expand Down
22 changes: 0 additions & 22 deletions python/llm/src/ipex_llm/ggml/model/chatglm/__init__.py

This file was deleted.

Loading

0 comments on commit 401013a

Please sign in to comment.