From d86477f14db2a8bdfc022dffb6ae4efec2918e56 Mon Sep 17 00:00:00 2001 From: hxsz1997 <45651968+hxsz1997@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:48:16 +0800 Subject: [PATCH] Remove native_int4 in LangChain examples (#10510) * rebase the modify to ipex-llm * modify the typo --- python/llm/example/CPU/LangChain/README.md | 5 - .../CPU/LangChain/README_nativeint4.md | 128 ---------------- .../LangChain/{transformers_int4 => }/chat.py | 0 .../{transformers_int4 => }/llm_math.py | 0 .../CPU/LangChain/native_int4/docqa.py | 110 -------------- .../CPU/LangChain/native_int4/streamchat.py | 82 ---------- .../LangChain/native_int4/voiceassistant.py | 141 ------------------ .../LangChain/{transformers_int4 => }/rag.py | 0 .../{transformers_int4 => }/voiceassistant.py | 0 .../{transformer_int4_gpu => }/README.md | 2 +- .../{transformer_int4_gpu => }/chat.py | 0 .../{transformer_int4_gpu => }/rag.py | 0 12 files changed, 1 insertion(+), 467 deletions(-) delete mode 100644 python/llm/example/CPU/LangChain/README_nativeint4.md rename python/llm/example/CPU/LangChain/{transformers_int4 => }/chat.py (100%) rename python/llm/example/CPU/LangChain/{transformers_int4 => }/llm_math.py (100%) delete mode 100644 python/llm/example/CPU/LangChain/native_int4/docqa.py delete mode 100644 python/llm/example/CPU/LangChain/native_int4/streamchat.py delete mode 100644 python/llm/example/CPU/LangChain/native_int4/voiceassistant.py rename python/llm/example/CPU/LangChain/{transformers_int4 => }/rag.py (100%) rename python/llm/example/CPU/LangChain/{transformers_int4 => }/voiceassistant.py (100%) rename python/llm/example/GPU/LangChain/{transformer_int4_gpu => }/README.md (98%) rename python/llm/example/GPU/LangChain/{transformer_int4_gpu => }/chat.py (100%) rename python/llm/example/GPU/LangChain/{transformer_int4_gpu => }/rag.py (100%) diff --git a/python/llm/example/CPU/LangChain/README.md b/python/llm/example/CPU/LangChain/README.md index 3382590a052..104440e4073 100644 --- a/python/llm/example/CPU/LangChain/README.md +++ b/python/llm/example/CPU/LangChain/README.md @@ -66,8 +66,3 @@ python transformers_int4/voiceassistant.py -m [-q This example is adapted from https://python.langchain.com/docs/use_cases/chatbots/voice_assistant with only tiny code change. - -Some extra dependencies are required to be installed for this example. -```bash -pip install SpeechRecognition -pip install pyttsx3 -pip install PyAudio -pip install whisper.ai -pip install soundfile -``` - -```bash -python native_int4/voiceassistant.py -x MODEL_FAMILY -m CONVERTED_MODEL_PATH -t THREAD_NUM -c CONTEXT_SIZE -``` - -arguments info: -- `-m CONVERTED_MODEL_PATH`: **required**, path to the converted model -- `-x MODEL_FAMILY`: **required**, the model family of the model specified in `-m`, available options are `llama`, `gptneox` and `bloom` -- `-t THREAD_NUM`: specify the number of threads to use for inference. Default is `2`. -- `-c CONTEXT_SIZE`: specify maximum context size. Default to be 512. - -When you see output says -> listening now... - -Please say something through your microphone (e.g. What is AI). The program will automatically detect when you have completed your speech and recognize them. - -#### Known Issues -The speech_recognition library may occasionally skip recording due to low volume. An alternative option is to save the recording in WAV format using `PyAudio` and read the file as an input. Here is an example using PyAudio: -```python -import pyaudio -import speech_recognition as sr - -CHUNK = 1024 -FORMAT = pyaudio.paInt16 -CHANNELS = 1 # The desired number of input channels -RATE = 16000 # The desired rate (in Hz) -RECORD_SECONDS = 10 # Recording time (in second) -WAVE_OUTPUT_FILENAME = "/path/to/pyaudio_out.wav" -p = pyaudio.PyAudio() - -stream = p.open(format=FORMAT, - channels=CHANNELS, - rate=RATE, - input=True, - frames_per_buffer=CHUNK) - -print("*"*10, "Listening\n") -frames = [] -data =0 -for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): - data = stream.read(CHUNK) ## ,exception_on_overflow = False - frames.append(data) ## -print("*"*10, "Stop recording\n") - -stream.stop_stream() -stream.close() -p.terminate() - -wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') -wf.setnchannels(CHANNELS) -wf.setsampwidth(p.get_sample_size(FORMAT)) -wf.setframerate(RATE) -wf.writeframes(b''.join(frames)) -wf.close() - -r = sr.Recognizer() -with sr.AudioFile(WAVE_OUTPUT_FILENAME) as source1: - audio = r.record(source1) # read the entire audio file -frame_data = np.frombuffer(audio.frame_data, np.int16).flatten().astype(np.float32) / 32768.0 -``` - -### 4. Math - -This is an example using `LLMMathChain`. This example has been validated using [phoenix-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b). - -```bash -python transformers_int4/math.py -m MODEL_PATH -q QUESTION -``` -arguments info: -- `-m CONVERTED_MODEL_PATH`: **required**, path to the transformers model -- `-q QUESTION`: question to ask. Default is `What is 13 raised to the .3432 power?`. - diff --git a/python/llm/example/CPU/LangChain/transformers_int4/chat.py b/python/llm/example/CPU/LangChain/chat.py similarity index 100% rename from python/llm/example/CPU/LangChain/transformers_int4/chat.py rename to python/llm/example/CPU/LangChain/chat.py diff --git a/python/llm/example/CPU/LangChain/transformers_int4/llm_math.py b/python/llm/example/CPU/LangChain/llm_math.py similarity index 100% rename from python/llm/example/CPU/LangChain/transformers_int4/llm_math.py rename to python/llm/example/CPU/LangChain/llm_math.py diff --git a/python/llm/example/CPU/LangChain/native_int4/docqa.py b/python/llm/example/CPU/LangChain/native_int4/docqa.py deleted file mode 100644 index ce7cf8f111f..00000000000 --- a/python/llm/example/CPU/LangChain/native_int4/docqa.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This would makes sure Python is aware there is more than one sub-package within bigdl, -# physically located elsewhere. -# Otherwise there would be module not found error in non-pip's setting as Python would -# only search the first bigdl package and end up finding only one sub-package. - -# Code is adapted from https://python.langchain.com/docs/modules/chains/additional/question_answering.html - -import argparse - -from langchain.vectorstores import Chroma -from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT, - QA_PROMPT) -from langchain.text_splitter import CharacterTextSplitter -from langchain.chains.question_answering import load_qa_chain -from langchain.callbacks.manager import CallbackManager -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler - -from ipex_llm.langchain.llms import * -from ipex_llm.langchain.embeddings import * - - -def main(args): - - input_path = args.input_path - model_path = args.model_path - model_family = args.model_family - query = args.question - n_ctx = args.n_ctx - n_threads=args.thread_num - - callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) - - # split texts of input doc - with open(input_path) as f: - input_doc = f.read() - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) - texts = text_splitter.split_text(input_doc) - - model_family_to_embeddings = { - "llama": LlamaEmbeddings, - "gptneox": GptneoxEmbeddings, - "bloom": BloomEmbeddings, - "starcoder": StarcoderEmbeddings - } - - model_family_to_llm = { - "llama": LlamaLLM, - "gptneox": GptneoxLLM, - "bloom": BloomLLM, - "starcoder": StarcoderLLM - } - - if model_family in model_family_to_embeddings and model_family in model_family_to_llm: - llm_embeddings = model_family_to_embeddings[model_family] - langchain_llm = model_family_to_llm[model_family] - else: - raise ValueError(f"Unknown model family: {model_family}") - - # create embeddings and store into vectordb - embeddings = llm_embeddings(model_path=model_path, n_threads=n_threads, n_ctx=n_ctx) - docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever() - - # get relavant texts - docs = docsearch.get_relevant_documents(query) - - bigdl_llm = langchain_llm( - model_path=model_path, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager - ) - - doc_chain = load_qa_chain( - bigdl_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=callback_manager - ) - - doc_chain.run(input_documents=docs, question=query) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='BigDLCausalLM Langchain QA over Docs Example') - parser.add_argument('-x','--model-family', type=str, required=True, - choices=["llama", "bloom", "gptneox", "chatglm", "starcoder"], - help='the model family') - parser.add_argument('-m','--model-path', type=str, required=True, - help='the path to the converted llm model') - parser.add_argument('-i', '--input-path', type=str, required=True, - help='the path to the input doc.') - parser.add_argument('-q', '--question', type=str, default='What is AI?', - help='qustion you want to ask.') - parser.add_argument('-c','--n-ctx', type=int, default=2048, - help='the maximum context size') - parser.add_argument('-t','--thread-num', type=int, default=2, - help='number of threads to use for inference') - args = parser.parse_args() - - main(args) diff --git a/python/llm/example/CPU/LangChain/native_int4/streamchat.py b/python/llm/example/CPU/LangChain/native_int4/streamchat.py deleted file mode 100644 index a0127dd0876..00000000000 --- a/python/llm/example/CPU/LangChain/native_int4/streamchat.py +++ /dev/null @@ -1,82 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This would makes sure Python is aware there is more than one sub-package within bigdl, -# physically located elsewhere. -# Otherwise there would be module not found error in non-pip's setting as Python would -# only search the first bigdl package and end up finding only one sub-package. - -import argparse - -from ipex_llm.langchain.llms import * -from langchain import PromptTemplate, LLMChain -from langchain.callbacks.manager import CallbackManager -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler - - -def main(args): - - question = args.question - model_path = args.model_path - model_family = args.model_family - n_threads = args.thread_num - template ="""{question}""" - - prompt = PromptTemplate(template=template, input_variables=["question"]) - - # Callbacks support token-wise streaming - callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) - - model_family_to_llm = { - "llama": LlamaLLM, - "gptneox": GptneoxLLM, - "bloom": BloomLLM, - "starcoder": StarcoderLLM, - "chatglm": ChatGLMLLM - } - - if model_family in model_family_to_llm: - langchain_llm = model_family_to_llm[model_family] - else: - raise ValueError(f"Unknown model family: {model_family}") - - # Verbose is required to pass to the callback manager - llm = langchain_llm( - model_path=model_path, - n_threads=n_threads, - callback_manager=callback_manager, - verbose=True - ) - - llm_chain = LLMChain(prompt=prompt, llm=llm) - - llm_chain.run(question) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='BigDLCausalLM Langchain Streaming Chat Example') - parser.add_argument('-x','--model-family', type=str, required=True, - choices=["llama", "bloom", "gptneox", "chatglm", "starcoder"], - help='the model family') - parser.add_argument('-m','--model-path', type=str, required=True, - help='the path to the converted llm model') - parser.add_argument('-q', '--question', type=str, default='What is AI?', - help='qustion you want to ask.') - parser.add_argument('-t','--thread-num', type=int, default=2, - help='Number of threads to use for inference') - args = parser.parse_args() - - main(args) diff --git a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py b/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py deleted file mode 100644 index c41666ca1d4..00000000000 --- a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py +++ /dev/null @@ -1,141 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This would makes sure Python is aware there is more than one sub-package within bigdl, -# physically located elsewhere. -# Otherwise there would be module not found error in non-pip's setting as Python would -# only search the first bigdl package and end up finding only one sub-package. - -# Code adapted from https://python.langchain.com/docs/use_cases/chatbots/voice_assistant - - -from langchain import LLMChain, PromptTemplate -from ipex_llm.langchain.llms import * -from langchain.memory import ConversationBufferWindowMemory -from langchain.callbacks.manager import CallbackManager -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -import speech_recognition as sr -import pyttsx3 -import argparse - - -def prepare_chain(args): - - model_path = args.model_path - model_family = args.model_family - n_threads = args.thread_num - n_ctx = args.context_size - - # Use a easy prompt could bring good-enough result - # You could tune the prompt based on your own model to perform better - template = """ - {history} - - Q: {human_input} - A:""" - prompt = PromptTemplate(input_variables=["history", "human_input"], template=template) - - # We use our BigDLCausalLLM to subsititute OpenAI web-required API - model_family_to_llm = { - "llama": LlamaLLM, - "gptneox": GptneoxLLM, - "bloom": BloomLLM, - "starcoder": StarcoderLLM, - "chatglm": ChatGLMLLM - } - - if model_family in model_family_to_llm: - langchain_llm = model_family_to_llm[model_family] - else: - raise ValueError(f"Unknown model family: {model_family}") - - callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) - llm = langchain_llm( - model_path=model_path, - n_threads=n_threads, - callback_manager=callback_manager, - verbose=True, - n_ctx=n_ctx, - stop=['\n\n'] # You could tune the stop words based on your own model to perform better - ) - - # Following code are complete the same as the use-case - voiceassitant_chain = LLMChain( - llm=llm, - prompt=prompt, - verbose=True, - memory=ConversationBufferWindowMemory(k=2), - ) - - return voiceassitant_chain - - -def listen(voiceassitant_chain): - engine = pyttsx3.init() - r = sr.Recognizer() - with sr.Microphone() as source: - print("Calibrating...") - r.adjust_for_ambient_noise(source, duration=5) - # optional parameters to adjust microphone sensitivity - # r.energy_threshold = 200 - # r.pause_threshold=0.5 - - print("Okay, go!") - while 1: - text = "" - print("listening now...") - try: - audio = r.listen(source, timeout=5, phrase_time_limit=30) - print("Recognizing...") - # whisper model options are found here: https://github.com/openai/whisper#available-models-and-languages - # other speech recognition models are also available. - text = r.recognize_whisper( - audio, - model="medium.en", - show_dict=True, - )["text"] - except Exception as e: - unrecognized_speech_text = ( - f"Sorry, I didn't catch that. Exception was: {e}s" - ) - text = unrecognized_speech_text - print(text) - - response_text = voiceassitant_chain.predict(human_input=text) - print(response_text) - engine.say(response_text) - engine.runAndWait() - - -def main(args): - chain = prepare_chain(args) - listen(chain) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='BigDLCausalLM Langchain Voice Assistant Example') - parser.add_argument('-x','--model-family', type=str, required=True, - choices=["llama", "bloom", "gptneox", "chatglm", "starcoder"], - help='the model family') - parser.add_argument('-m','--model-path', type=str, required=True, - help='the path to the converted llm model') - parser.add_argument('-t','--thread-num', type=int, default=2, - help='Number of threads to use for inference') - parser.add_argument('-c','--context-size', type=int, default=512, - help='Maximum context size') - args = parser.parse_args() - - main(args) diff --git a/python/llm/example/CPU/LangChain/transformers_int4/rag.py b/python/llm/example/CPU/LangChain/rag.py similarity index 100% rename from python/llm/example/CPU/LangChain/transformers_int4/rag.py rename to python/llm/example/CPU/LangChain/rag.py diff --git a/python/llm/example/CPU/LangChain/transformers_int4/voiceassistant.py b/python/llm/example/CPU/LangChain/voiceassistant.py similarity index 100% rename from python/llm/example/CPU/LangChain/transformers_int4/voiceassistant.py rename to python/llm/example/CPU/LangChain/voiceassistant.py diff --git a/python/llm/example/GPU/LangChain/transformer_int4_gpu/README.md b/python/llm/example/GPU/LangChain/README.md similarity index 98% rename from python/llm/example/GPU/LangChain/transformer_int4_gpu/README.md rename to python/llm/example/GPU/LangChain/README.md index 9822aae856e..9fd9489e4b1 100644 --- a/python/llm/example/GPU/LangChain/transformer_int4_gpu/README.md +++ b/python/llm/example/GPU/LangChain/README.md @@ -92,7 +92,7 @@ arguments info: - `-m MODEL_PATH`: **required**, path to the model - `-q QUESTION`: question to ask. Default is `What is AI?`. -#### 5.1. RAG (Retrival Augmented Generation) +#### 5.2. RAG (Retrival Augmented Generation) ```bash python rag.py -m [-q QUESTION] [-i INPUT_PATH] diff --git a/python/llm/example/GPU/LangChain/transformer_int4_gpu/chat.py b/python/llm/example/GPU/LangChain/chat.py similarity index 100% rename from python/llm/example/GPU/LangChain/transformer_int4_gpu/chat.py rename to python/llm/example/GPU/LangChain/chat.py diff --git a/python/llm/example/GPU/LangChain/transformer_int4_gpu/rag.py b/python/llm/example/GPU/LangChain/rag.py similarity index 100% rename from python/llm/example/GPU/LangChain/transformer_int4_gpu/rag.py rename to python/llm/example/GPU/LangChain/rag.py