Remove chatglm_C Module to Eliminate LGPL Dependency (intel-analytics…

…#11178) * remove chatglm_C.**.pyd to solve ngsolve weak copyright vunl * fix style check error * remove chatglm native int4 from langchain
lalalapotter · May 31, 2024 · 401013a · 401013a
1 parent 50b5f44
commit 401013a
Show file tree

Hide file tree

Showing 14 changed files with 19 additions and 690 deletions.
diff --git a/.github/workflows/llm-binary-build.yml b/.github/workflows/llm-binary-build.yml
@@ -72,12 +72,6 @@ jobs:
           export http_proxy=${HTTP_PROXY}
           export https_proxy=${HTTPS_PROXY}
           yum install -y gcc-toolset-11 cmake git
-          conda remove -n python39 --all -y
-          conda create -n python39 python=3.9 -y
-          conda remove -n python310 --all -y
-          conda create -n python310 python=3.10 -y
-          conda remove -n python311 --all -y
-          conda create -n python311 python=3.11 -y
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "intel-analytics/llm.cpp"
@@ -109,42 +103,6 @@ jobs:
           mv build/libstarcoder-api.so release/libstarcoder-api.so
           mv build/quantize-starcoder release/quantize-starcoder
           mv build/libstarcoder.so release/libstarcoder_avxvnni.so
-      - name: Build Chatglm
-        shell: bash
-        run: |
-          source activate python39 || conda activate python39
-          cd src/chatglm
-          scl enable gcc-toolset-11 "cmake -B build"
-          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
-      - name: Move Chatglm binaries
-        shell: bash
-        run: |
-          mv src/chatglm/build/main release/main-chatglm_vnni
-          mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so release/chatglm_C.cpython-39-x86_64-linux-gnu.so
-      - name: Build Chatglm Py310
-        shell: bash
-        run: |
-          source activate python310 || conda activate python310
-          cd src/chatglm
-          rm -r build
-          scl enable gcc-toolset-11 "cmake -B build"
-          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
-      - name: Move Chatglm binaries Py310
-        shell: bash
-        run: |
-          mv src/chatglm/build/_C.cpython-310-x86_64-linux-gnu.so release/chatglm_C.cpython-310-x86_64-linux-gnu.so
-      - name: Build Chatglm Py311
-        shell: bash
-        run: |
-          source activate python311 || conda activate python311
-          cd src/chatglm
-          rm -r build
-          scl enable gcc-toolset-11 "cmake -B build"
-          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
-      - name: Move Chatglm binaries Py311
-        shell: bash
-        run: |
-          mv src/chatglm/build/_C.cpython-311-x86_64-linux-gnu.so release/chatglm_C.cpython-311-x86_64-linux-gnu.so
       - name: Archive build files
         uses: actions/upload-artifact@v3
         with:
@@ -155,9 +113,6 @@ jobs:
         shell: bash
         run: |
           make clean
-          conda remove -n python39 --all -y
-          conda remove -n python310 --all -y
-          conda remove -n python311 --all -y
 
   check-linux-avx512-artifact:
     if: ${{contains(inputs.platform, 'Linux')}}
@@ -286,8 +241,6 @@ jobs:
           export http_proxy=${HTTP_PROXY}
           export https_proxy=${HTTPS_PROXY}
           yum install -y gcc-toolset-11 cmake git
-          conda remove -n python39 --all -y
-          conda create -n python39 python=3.9 -y
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "intel-analytics/llm.cpp"
@@ -299,11 +252,6 @@ jobs:
         run: |
           scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build"
           scl enable gcc-toolset-11 "cmake --build build --config Release -j"
-          # build chatglm
-          source activate python39 || conda activate python39
-          cd src/chatglm
-          scl enable gcc-toolset-11 "cmake -B build"
-          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
       - name: Move amx release binary
         shell: bash
         run: |
@@ -316,9 +264,6 @@ jobs:
           mv build/libgptneox.so amx_release/libgptneox_amx.so
           mv build/quantize-starcoder amx_release/quantize-starcoder_amx
           mv build/libstarcoder.so amx_release/libstarcoder_amx.so
-          # chatglm binary files
-          mv src/chatglm/build/main amx_release/main-chatglm_amx
-          # mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so amx_release/chatglm_C.cpython-39-x86_64-linux-gnu.so
       - name: Archive amx build files
         uses: actions/upload-artifact@v3
         with:
@@ -329,7 +274,6 @@ jobs:
         shell: bash
         run: |
           make clean
-          conda remove -n python39 --all -y
           
   check-windows-avx2-artifact:
     if: ${{contains(inputs.platform, 'Windows')}}
@@ -393,10 +337,6 @@ jobs:
     needs: check-windows-avx-vnni-artifact
     if: needs.check-windows-avx-vnni-artifact.outputs.if-exists == 'false'
     steps:
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.9"
       - name: Set access token
         run: |
           echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
@@ -438,47 +378,6 @@ jobs:
           # mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
           mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
           mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
-      - name: Build Chatglm
-        shell: powershell
-        run: |
-          cd src/chatglm
-          cmake -DAVXVNNI=ON -B build
-          cmake --build build --config Release -j
-      - name: Move Chatglm binaries
-        shell: powershell
-        run: |
-          mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
-          mv src/chatglm/build/Release/_C.cp39-win_amd64.pyd release/chatglm_C.cp39-win_amd64.pyd
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-      - name: Build Chatglm Py310
-        shell: powershell
-        run: |
-          cd src/chatglm
-          rm -r build
-          cmake -DAVXVNNI=ON -B build
-          cmake --build build --config Release -j
-      - name: Move Chatglm binaries Py310
-        shell: powershell
-        run: |
-          mv src/chatglm/build/Release/_C.cp310-win_amd64.pyd release/chatglm_C.cp310-win_amd64.pyd
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
-      - name: Build Chatglm Py311
-        shell: powershell
-        run: |
-          cd src/chatglm
-          rm -r build
-          cmake -DAVXVNNI=ON -B build
-          cmake --build build --config Release -j
-      - name: Move Chatglm binaries Py311
-        shell: powershell
-        run: |
-          mv src/chatglm/build/Release/_C.cp311-win_amd64.pyd release/chatglm_C.cp311-win_amd64.pyd
       - name: Archive build files
         uses: actions/upload-artifact@v3
         with:

diff --git a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md
@@ -31,7 +31,7 @@ You may also convert Hugging Face *Transformers* models into native INT4 format,
 ```eval_rst
 .. note::
 
-   * Currently only llama/bloom/gptneox/starcoder/chatglm model families are supported; for other models, you may use the Hugging Face ``transformers`` INT4 format as described `above <./langchain_api.html#using-hugging-face-transformers-int4-format>`_.
+   * Currently only llama/bloom/gptneox/starcoder model families are supported; for other models, you may use the Hugging Face ``transformers`` INT4 format as described `above <./langchain_api.html#using-hugging-face-transformers-int4-format>`_.
 
    * You may choose the corresponding API developed for specific native models to load the converted model.
 ```
@@ -41,9 +41,9 @@ from ipex_llm.langchain.llms import LlamaLLM
 from ipex_llm.langchain.embeddings import LlamaEmbeddings
 from langchain.chains.question_answering import load_qa_chain
 
-# switch to ChatGLMEmbeddings/GptneoxEmbeddings/BloomEmbeddings/StarcoderEmbeddings to load other models
+# switch to GptneoxEmbeddings/BloomEmbeddings/StarcoderEmbeddings to load other models
 embeddings = LlamaEmbeddings(model_path='/path/to/converted/model.bin')
-# switch to ChatGLMLLM/GptneoxLLM/BloomLLM/StarcoderLLM to load other models
+# switch to GptneoxLLM/BloomLLM/StarcoderLLM to load other models
 ipex_llm = LlamaLLM(model_path='/path/to/converted/model.bin')
 
 doc_chain = load_qa_chain(ipex_llm, ...)

diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@@ -31,7 +31,7 @@ IPEX-LLM provides ``TransformersLLM`` and ``TransformersPipelineLLM``, which imp
 Native Model
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following LLM wrappers with the native (cpp) implementation for maximum performance.
+For ``llama``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following LLM wrappers with the native (cpp) implementation for maximum performance.
 
 .. tabs::
 
@@ -47,18 +47,6 @@ For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, yo
             .. automethod:: stream
             .. automethod:: get_num_tokens
 
-    .. tab:: ChatGLM
-
-        .. autoclass:: ipex_llm.langchain.llms.ChatGLMLLM
-            :members:
-            :undoc-members:
-            :show-inheritance:
-            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
-
-            .. automethod:: validate_environment
-            .. automethod:: stream
-            .. automethod:: get_num_tokens
-
     .. tab:: Bloom
 
         .. autoclass:: ipex_llm.langchain.llms.BloomLLM

diff --git a/python/llm/example/CPU/Native-Models/native_int4_pipeline.py b/python/llm/example/CPU/Native-Models/native_int4_pipeline.py
@@ -36,8 +36,7 @@ def load(model_path, model_family, n_threads):
         "llama": LlamaForCausalLM,
         "gptneox": GptneoxForCausalLM,
         "bloom": BloomForCausalLM,
-        "starcoder": StarcoderForCausalLM,
-        "chatglm": ChatGLMForCausalLM
+        "starcoder": StarcoderForCausalLM
     }
 
     if model_family in model_family_to_class:
@@ -55,7 +54,7 @@ def load(model_path, model_family, n_threads):
 
 def inference(llm, repo_id_or_model_path, model_family, prompt):
 
-    if model_family in ['llama', 'gptneox', 'bloom', 'starcoder', 'chatglm']:
+    if model_family in ['llama', 'gptneox', 'bloom', 'starcoder']:
         # ------ Option 1: Use IPEX-LLM based tokenizer
         print('-'*20, ' IPEX-LLM based tokenizer ', '-'*20)
         st = time.time()
@@ -109,9 +108,9 @@ def main():
     parser.add_argument('--thread-num', type=int, default=2, required=True,
                         help='Number of threads to use for inference')
     parser.add_argument('--model-family', type=str, default='llama', required=True,
-                        choices=["llama", "llama2", "bloom", "gptneox", "starcoder", "chatglm"],
+                        choices=["llama", "llama2", "bloom", "gptneox", "starcoder"],
                         help="The model family of the large language model (supported option: 'llama', 'llama2', "
-                             "'gptneox', 'bloom', 'starcoder', 'chatglm')")
+                             "'gptneox', 'bloom', 'starcoder')")
     parser.add_argument('--repo-id-or-model-path', type=str, required=True,
                         help='The path to the huggingface checkpoint folder')
     parser.add_argument('--prompt', type=str, default='Once upon a time, there existed a little girl who liked to have adventures. ',

diff --git a/python/llm/setup.py b/python/llm/setup.py
@@ -86,12 +86,7 @@
     "quantize-llama_vnni.exe",
     "quantize-gptneox_vnni.exe",
     "quantize-bloom_vnni.exe",
-    "quantize-starcoder_vnni.exe",
-
-    "main-chatglm_vnni.exe",
-    "chatglm_C.cp39-win_amd64.pyd",
-    "chatglm_C.cp310-win_amd64.pyd",
-    "chatglm_C.cp311-win_amd64.pyd"
+    "quantize-starcoder_vnni.exe"
 ]
 linux_binarys = [
     "libllama_avx.so",
@@ -125,13 +120,7 @@
     "main-llama",
     "main-gptneox",
     "main-bloom",
-    "main-starcoder",
-
-    "main-chatglm_vnni",
-    "main-chatglm_amx",
-    "chatglm_C.cpython-39-x86_64-linux-gnu.so",
-    "chatglm_C.cpython-310-x86_64-linux-gnu.so",
-    "chatglm_C.cpython-311-x86_64-linux-gnu.so"
+    "main-starcoder"
 ]
 
 ext_lib_urls = [

diff --git a/python/llm/src/ipex_llm/ggml/convert.py b/python/llm/src/ipex_llm/ggml/convert.py
@@ -76,10 +76,6 @@ def _convert_starcoder(model_path, outfile_dir, outtype):
     _convert_starcoder_hf_to_ggml(model_path, outfile_dir, outtype)
 
 
-def _convert_chatglm(model_path, outfile_dir, outtype):
-    return _convert_chatglm_hf_to_ggml(model_path, outfile_dir, outtype)
-
-
 def _convert_to_ggml(model_path: str, outfile_dir: str,
                      model_family: str = 'llama', outtype: str="fp16"):
     """

diff --git a/python/llm/src/ipex_llm/ggml/convert_model.py b/python/llm/src/ipex_llm/ggml/convert_model.py
@@ -16,7 +16,7 @@
 import os
 import time
 from pathlib import Path
-from ipex_llm.ggml.convert import _convert_to_ggml, _convert_chatglm
+from ipex_llm.ggml.convert import _convert_to_ggml
 from ipex_llm.ggml.quantize import quantize
 from ipex_llm.utils.common import invalidInputError
 import argparse
@@ -54,9 +54,9 @@ def convert_model(input_path: str,
     # make sure directory exists
     os.makedirs(output_path, exist_ok=True)
     # check input value
-    invalidInputError(model_family in ['llama', 'bloom', 'gptneox', 'starcoder', 'chatglm'],
+    invalidInputError(model_family in ['llama', 'bloom', 'gptneox', 'starcoder'],
                       "Now we only support quantization of model \
-                       family('llama', 'bloom', 'gptneox', 'starcoder', 'chatglm')",
+                       family('llama', 'bloom', 'gptneox', 'starcoder')",
                       "{} is not in the list.".format(model_family))
     invalidInputError(os.path.isdir(output_path),
                       "The output_path {} was not a directory".format(output_path))
@@ -78,12 +78,6 @@ def convert_model(input_path: str,
                           family('llama', 'gptneox', 'starcoder')",
                           "{} is not in the list.".format(model_family))
 
-    # chatglm merges convertion and quantization into one operation.
-    if model_family == 'chatglm':
-        return _convert_chatglm(model_path=input_path,
-                                outfile_dir=output_path,
-                                outtype=dtype)
-
     if tmp_path is not None:
         model_name = Path(input_path).stem
         tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')

diff --git a/python/llm/src/ipex_llm/ggml/model/chatglm/__init__.py b/python/llm/src/ipex_llm/ggml/model/chatglm/__init__.py