From 2fa167a38b324f838739ed9f419fbf875c396d75 Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:40:38 -0700 Subject: [PATCH 1/6] Revert "Update README.md" This reverts commit c9a11563ea5f1f81edc9622da75e70fcd9312e2d. --- memgpt/local_llm/README.md | 157 ++++++++----------------------------- 1 file changed, 32 insertions(+), 125 deletions(-) diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md index e285a49964..a79c0f9e5f 100644 --- a/memgpt/local_llm/README.md +++ b/memgpt/local_llm/README.md @@ -1,110 +1,19 @@ ⁉️ Need help configuring local LLMs with MemGPT? Ask for help on [our Discord](https://discord.gg/9GEQrxmVyE) or [post on the GitHub discussion](https://github.com/cpacker/MemGPT/discussions/67). -If you have a hosted ChatCompletion-compatible endpoint that works with function calling, you can simply set `OPENAI_API_BASE` (`export OPENAI_API_BASE=...`) to the IP+port of your endpoint. **As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.** +👀 If you have a hosted ChatCompletion-compatible endpoint that works with function calling, you can simply set `OPENAI_API_BASE` (`export OPENAI_API_BASE=...`) to the IP+port of your endpoint. **As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.** ---- - -# ⚡ Quick overview - -1. Put your own LLM behind a web server API (e.g. [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)) -2. Set `OPENAI_API_BASE=YOUR_API_IP_ADDRESS` and `BACKEND_TYPE=webui` -3. Run MemGPT with `python3 main.py --no_verify`, it should now use your LLM instead of OpenAI GPT -4. If things aren't working, read the full instructions below - -When using open LLMs with MemGPT, **the main failure case will be your LLM outputting a string that cannot be understood by MemGPT**. MemGPT uses function calling to manage memory (eg `edit_core_memory(...)` and interact with the user (`send_message(...)`), so your LLM needs generate outputs that can be parsed into MemGPT function calls. +🙋 Our examples assume that you're using [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui) to put your LLMs behind a web server. If you need help setting this up, check the instructions [here](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui). More LLM web server support to come soon (tell us what you use and we'll add it)! --- # How to connect MemGPT to non-OpenAI LLMs -
-

🖥️ Serving your LLM from a web server (WebUI example)

- -To get MemGPT to work with a local LLM, you need to have the LLM running on a server that takes API requests. - -For the purposes of this example, we're going to serve (host) the LLMs using [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui), but if you want to use something else you can! This also assumes your running web UI locally - if you're running on e.g. Runpod, you'll want to follow Runpod specific instructions (for example use [TheBloke's one-click UI and API](https://github.com/TheBlokeAI/dockerLLM/blob/main/README_Runpod_LocalLLMsUIandAPI.md)) - -1. Install oobabooga web UI using the instructions [here](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui) -2. Once installed, launch the web server with `python server.py` -3. Navigate to the web app (if local, this is probably [`http://127.0.0.1:7860`](http://localhost:7860)), select the model you want to use, adjust your GPU and CPU memory settings, and click "load" -4. If the model was loaded successfully, you should be able to access it via the API (if local, this is probably on port `5000`) -5. Assuming steps 1-4 went correctly, the LLM is now properly hosted on a port you can point MemGPT to! - -WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py). - -⁉️ If you have problems getting WebUI setup, please use the [official web UI repo for support](https://github.com/oobabooga/text-generation-webui)! There will be more answered questions about web UI there vs here on the MemGPT repo. - -
- -
-

🦙 Running MemGPT with your own LLM

- -Once you have an LLM web server set up, all you need to do to connect it to MemGPT is set two environment variables: - -- `OPENAI_API_BASE` - - set this to the IP address of your LLM API - for example, if you're using web UI on a local machine, this will look like `http://127.0.0.1:5000` -- `BACKEND_TYPE` - - set this to `webui` - - this controls how MemGPT packages the HTTP request to the webserver, see [this code](https://github.com/cpacker/MemGPT/blob/main/memgpt/local_llm/webui/api.py) - - currently this is set up to work with web UI, but it might work with other backends / web servers too! - - if you'd like to use a different web server and you need a different style of HTTP request, let us know on the discussion page (https://github.com/cpacker/MemGPT/discussions/67) and we'll try to add it ASAP - -You can change the prompt format and output parser used with the `--model` flag. For example: - -```sh -# this will cause MemGPT to use the airoboros-l2-70b-2.1 parsers, regardless of what model you're hosting on your web server -# you can mix and match parsers + models! -$ python3 main.py --model airoboros-l2-70b-2.1 -``` - -### Example with airoboros 70b - -```sh -# assuming we're running a model (eg airoboros) behind a textgen webui server -export OPENAI_API_BASE=127.0.0.1:5000 # change this to your actual API address -export BACKEND_TYPE=webui # if you don't set this, MemGPT will throw an error - -# using --no_verify can be helpful if the LLM you're using doesn't output inner monologue properly -$ python3 main.py --no_verify - -Running... [exit by typing '/exit'] -💭 Bootup sequence complete. Persona activated. Testing messaging functionality. - -💭 None -🤖 Welcome! My name is Sam. How can I assist you today? -Enter your message: My name is Brad, not Chad... - -💭 None -⚡🧠 [function] updating memory with core_memory_replace: - First name: Chad - → First name: Brad -``` - -
- -
-

🙋 Adding support for new LLMs + improving performance

- -⁉️ When using open LLMs with MemGPT, **the main failure case will be your LLM outputting a string that cannot be understood by MemGPT**. MemGPT uses function calling to manage memory (eg `edit_core_memory(...)` and interact with the user (`send_message`), so your LLM needs generate outputs that can be parsed into MemGPT function calls. - -### What is a "wrapper"? - -To support function calling with open LLMs for MemGPT, we utilize "wrapper" code that: - -1. turns `system` (the MemGPT instructions), `messages` (the MemGPT conversation window), and `functions` (the MemGPT function set) parameters from ChatCompletion into a single unified prompt string for your LLM -2. turns the output string generated by your LLM back into a MemGPT function call - -Different LLMs are trained using different prompt formats (eg `#USER:` vs `user` vs ...), and LLMs that are trained on function calling are often trained using different function call formats, so if you're getting poor performance, try experimenting with different prompt formats! We recommend starting with the prompt format (and function calling format) recommended in the HuggingFace model card, and experimenting from there. - -We currently only support a few prompt formats in this repo ([located here](https://github.com/cpacker/MemGPT/tree/main/memgpt/local_llm/llm_chat_completion_wrappers))! If you write a new parser, please open a PR and we'll merge it in. - -
-

Adding a new wrapper (change the prompt format + function parser)

- -To make a new wrapper (for example, because you want to try a different prompt format), you just need to subclass `LLMChatCompletionWrapper`. Your new wrapper class needs to implement two functions: - -- One to go from ChatCompletion messages/functions schema to a prompt string -- And one to go from raw LLM outputs to a ChatCompletion response +**If you have an LLM that is function-call finetuned**: + - Implement a wrapper class for that model + - The wrapper class needs to implement two functions: + - One to go from ChatCompletion messages/functions schema to a prompt string + - And one to go from raw LLM outputs to a ChatCompletion response + - Put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE` ```python class LLMChatCompletionWrapper(ABC): @@ -120,13 +29,6 @@ class LLMChatCompletionWrapper(ABC): pass ``` -You can follow our example wrappers ([located here](https://github.com/cpacker/MemGPT/tree/main/memgpt/local_llm/llm_chat_completion_wrappers)). - -
- -
-

Example wrapper for Airoboros

- ## Example with [Airoboros](https://huggingface.co/jondurbin/airoboros-l2-70b-2.1) (llama2 finetune) To help you get started, we've implemented an example wrapper class for a popular llama2 model **finetuned on function calling** (Airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP. @@ -156,19 +58,35 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper): } """ ``` +See full file [here](llm_chat_completion_wrappers/airoboros.py). WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py). -See full file [here](llm_chat_completion_wrappers/airoboros.py). +### Running the example -
+```sh +# running airoboros behind a textgen webui server +export OPENAI_API_BASE = +export BACKEND_TYPE = webui -
+# using --no_verify because this airoboros example does not output inner monologue, just functions +# airoboros is able to properly call `send_message` +$ python3 main.py --no_verify ---- +Running... [exit by typing '/exit'] +💭 Bootup sequence complete. Persona activated. Testing messaging functionality. + +💭 None +🤖 Welcome! My name is Sam. How can I assist you today? +Enter your message: My name is Brad, not Chad... + +💭 None +⚡🧠 [function] updating memory with core_memory_replace: + First name: Chad + → First name: Brad +``` -## FAQ +--- -
-

Status of ChatCompletion w/ function calling and open LLMs

+## Status of ChatCompletion w/ function calling and open LLMs MemGPT uses function calling to do memory management. With [OpenAI's ChatCompletion API](https://platform.openai.com/docs/api-reference/chat/), you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call. @@ -178,19 +96,8 @@ In the future, more open LLMs and LLM servers (that can host OpenAI-compatable C 2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one. -
+## What is this all this extra code for? -
-

What is this all this extra code for?

- Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that adds parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set. To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting the environment variables `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output). - -
- -
-

Need more help?

- - Ask for help on [our Discord](https://discord.gg/9GEQrxmVyE) or [post on the GitHub discussion](https://github.com/cpacker/MemGPT/discussions/67). -
From 189cd99855019d5810628c3a3e4c18e1ab78d79a Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:40:45 -0700 Subject: [PATCH 2/6] Revert "cleanup" This reverts commit b0eb123e9ccdd696ccc943888a2f1730a62939fb. --- memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py index 0ce5d4b12e..d1ae5f22e6 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py @@ -188,6 +188,7 @@ def create_function_call(function_call): if self.include_opening_brance_in_prefix: prompt += "\n{" + print(prompt) return prompt def clean_function_args(self, function_name, function_args): From e1927808501c508b8ef15cc33cb8177986ae3c38 Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:40:54 -0700 Subject: [PATCH 3/6] Revert "Merge pull request #117 from cpacker/cleanup" This reverts commit 487e13c634fbf4866a53d5f99c01de85f661e8b9, reversing changes made to 516c24c73ba7b25bc55e396675441e5bf95f3569. --- memgpt/local_llm/chat_completion_proxy.py | 15 +- .../llm_chat_completion_wrappers/airoboros.py | 213 --------------- .../llm_chat_completion_wrappers/dolphin.py | 243 ------------------ memgpt/local_llm/webui/settings.py | 1 - 4 files changed, 7 insertions(+), 465 deletions(-) delete mode 100644 memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py index 41442781ec..de9da221fb 100644 --- a/memgpt/local_llm/chat_completion_proxy.py +++ b/memgpt/local_llm/chat_completion_proxy.py @@ -5,13 +5,12 @@ import json from .webui.api import get_webui_completion -from .llm_chat_completion_wrappers import airoboros, dolphin +from .llm_chat_completion_wrappers import airoboros from .utils import DotDict HOST = os.getenv("OPENAI_API_BASE") HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion DEBUG = False -DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper() async def get_chat_completion( @@ -23,14 +22,14 @@ async def get_chat_completion( if function_call != "auto": raise ValueError(f"function_call == {function_call} not supported (auto only)") - if model == "airoboros-l2-70b-2.1": - llm_wrapper = airoboros.Airoboros21InnerMonologueWrapper() - elif model == "dolphin-2.1-mistral-7b": - llm_wrapper = dolphin.Dolphin21MistralWrapper() + if model == "airoboros_v2.1": + llm_wrapper = airoboros.Airoboros21Wrapper() else: # Warn the user that we're using the fallback - print(f"Warning: no wrapper specified for local LLM, using the default wrapper") - llm_wrapper = DEFAULT_WRAPPER + print( + f"Warning: could not find an LLM wrapper for {model}, using the airoboros wrapper" + ) + llm_wrapper = airoboros.Airoboros21Wrapper() # First step: turn the message sequence into a prompt that the model expects prompt = llm_wrapper.chat_completion_to_prompt(messages, functions) diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py index 60f8ee6b80..98d3625e2f 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py @@ -150,7 +150,6 @@ def create_function_call(function_call): if self.include_opening_brance_in_prefix: prompt += "\n{" - print(prompt) return prompt def clean_function_args(self, function_name, function_args): @@ -203,215 +202,3 @@ def output_to_chat_completion_response(self, raw_llm_output): }, } return message - - -class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper): - """Still expect only JSON outputs from model, but add inner monologue as a field""" - - def __init__( - self, - simplify_json_content=True, - clean_function_args=True, - include_assistant_prefix=True, - include_opening_brace_in_prefix=True, - include_section_separators=True, - ): - self.simplify_json_content = simplify_json_content - self.clean_func_args = clean_function_args - self.include_assistant_prefix = include_assistant_prefix - self.include_opening_brance_in_prefix = include_opening_brace_in_prefix - self.include_section_separators = include_section_separators - - def chat_completion_to_prompt(self, messages, functions): - """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format - - A chat. - USER: {prompt} - ASSISTANT: - - Functions support: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling - - As an AI assistant, please select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format. - - Input: I want to know how many times 'Python' is mentioned in my text file. - - Available functions: - file_analytics: - description: This tool performs various operations on a text file. - params: - action: The operation we want to perform on the data, such as "count_occurrences", "find_line", etc. - filters: - keyword: The word or phrase we want to search for. - - OpenAI functions schema style: - - { - "name": "send_message", - "description": "Sends a message to the human user", - "parameters": { - "type": "object", - "properties": { - # https://json-schema.org/understanding-json-schema/reference/array.html - "message": { - "type": "string", - "description": "Message contents. All unicode (including emojis) are supported.", - }, - }, - "required": ["message"], - } - }, - """ - prompt = "" - - # System insturctions go first - assert messages[0]["role"] == "system" - prompt += messages[0]["content"] - - # Next is the functions preamble - def create_function_description(schema, add_inner_thoughts=True): - # airorobos style - func_str = "" - func_str += f"{schema['name']}:" - func_str += f"\n description: {schema['description']}" - func_str += f"\n params:" - if add_inner_thoughts: - func_str += ( - f"\n inner_thoughts: Deep inner monologue private to you only." - ) - for param_k, param_v in schema["parameters"]["properties"].items(): - # TODO we're ignoring type - func_str += f"\n {param_k}: {param_v['description']}" - # TODO we're ignoring schema['parameters']['required'] - return func_str - - # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format." - prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format." - prompt += f"\nAvailable functions:" - for function_dict in functions: - prompt += f"\n{create_function_description(function_dict)}" - - def create_function_call(function_call, inner_thoughts=None): - """Go from ChatCompletion to Airoboros style function trace (in prompt) - - ChatCompletion data (inside message['function_call']): - "function_call": { - "name": ... - "arguments": { - "arg1": val1, - ... - } - - Airoboros output: - { - "function": "send_message", - "params": { - "message": "Hello there! I am Sam, an AI developed by Liminal Corp. How can I assist you today?" - } - } - """ - airo_func_call = { - "function": function_call["name"], - "params": { - "inner_thoughts": inner_thoughts, - **json.loads(function_call["arguments"]), - }, - } - return json.dumps(airo_func_call, indent=2) - - # Add a sep for the conversation - if self.include_section_separators: - prompt += "\n### INPUT" - - # Last are the user/assistant messages - for message in messages[1:]: - assert message["role"] in ["user", "assistant", "function"], message - - if message["role"] == "user": - if self.simplify_json_content: - try: - content_json = json.loads(message["content"]) - content_simple = content_json["message"] - prompt += f"\nUSER: {content_simple}" - except: - prompt += f"\nUSER: {message['content']}" - elif message["role"] == "assistant": - prompt += f"\nASSISTANT:" - # need to add the function call if there was one - inner_thoughts = message["content"] - if message["function_call"]: - prompt += f"\n{create_function_call(message['function_call'], inner_thoughts=inner_thoughts)}" - elif message["role"] == "function": - # TODO find a good way to add this - # prompt += f"\nASSISTANT: (function return) {message['content']}" - prompt += f"\nFUNCTION RETURN: {message['content']}" - continue - else: - raise ValueError(message) - - # Add a sep for the response - if self.include_section_separators: - prompt += "\n### RESPONSE" - - if self.include_assistant_prefix: - prompt += f"\nASSISTANT:" - if self.include_opening_brance_in_prefix: - prompt += "\n{" - - return prompt - - def clean_function_args(self, function_name, function_args): - """Some basic MemGPT-specific cleaning of function args""" - cleaned_function_name = function_name - cleaned_function_args = function_args.copy() - - if function_name == "send_message": - # strip request_heartbeat - cleaned_function_args.pop("request_heartbeat", None) - - inner_thoughts = None - if "inner_thoughts" in function_args: - inner_thoughts = cleaned_function_args.pop("inner_thoughts") - - # TODO more cleaning to fix errors LLM makes - return inner_thoughts, cleaned_function_name, cleaned_function_args - - def output_to_chat_completion_response(self, raw_llm_output): - """Turn raw LLM output into a ChatCompletion style response with: - "message" = { - "role": "assistant", - "content": ..., - "function_call": { - "name": ... - "arguments": { - "arg1": val1, - ... - } - } - } - """ - if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{": - raw_llm_output = "{" + raw_llm_output - - try: - function_json_output = json.loads(raw_llm_output) - except Exception as e: - raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output}") - function_name = function_json_output["function"] - function_parameters = function_json_output["params"] - - if self.clean_func_args: - ( - inner_thoughts, - function_name, - function_parameters, - ) = self.clean_function_args(function_name, function_parameters) - - message = { - "role": "assistant", - "content": inner_thoughts, - "function_call": { - "name": function_name, - "arguments": json.dumps(function_parameters), - }, - } - return message diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py deleted file mode 100644 index d1ae5f22e6..0000000000 --- a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py +++ /dev/null @@ -1,243 +0,0 @@ -import json - -from .wrapper_base import LLMChatCompletionWrapper - - -class Dolphin21MistralWrapper(LLMChatCompletionWrapper): - """Wrapper for Dolphin 2.1 Mistral 7b: https://huggingface.co/ehartford/dolphin-2.1-mistral-7b - - Note: this wrapper formats a prompt that only generates JSON, no inner thoughts - """ - - def __init__( - self, - simplify_json_content=True, - clean_function_args=True, - include_assistant_prefix=True, - include_opening_brace_in_prefix=True, - include_section_separators=False, - ): - self.simplify_json_content = simplify_json_content - self.clean_func_args = clean_function_args - self.include_assistant_prefix = include_assistant_prefix - self.include_opening_brance_in_prefix = include_opening_brace_in_prefix - self.include_section_separators = include_section_separators - - def chat_completion_to_prompt(self, messages, functions): - """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format - - <|im_start|>system - You are Dolphin, a helpful AI assistant.<|im_end|> - <|im_start|>user - {prompt}<|im_end|> - <|im_start|>assistant - - Do function spec Airoboros style inside the system message: - Functions support: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling - - As an AI assistant, please select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format. - - Input: I want to know how many times 'Python' is mentioned in my text file. - - Available functions: - file_analytics: - description: This tool performs various operations on a text file. - params: - action: The operation we want to perform on the data, such as "count_occurrences", "find_line", etc. - filters: - keyword: The word or phrase we want to search for. - - OpenAI functions schema style: - - { - "name": "send_message", - "description": "Sends a message to the human user", - "parameters": { - "type": "object", - "properties": { - # https://json-schema.org/understanding-json-schema/reference/array.html - "message": { - "type": "string", - "description": "Message contents. All unicode (including emojis) are supported.", - }, - }, - "required": ["message"], - } - }, - """ - prompt = "" - - # <|im_start|>system - # You are Dolphin, a helpful AI assistant.<|im_end|> - - IM_START_TOKEN = "<|im_start|>" - IM_END_TOKEN = "<|im_end|>" - - # System instructions go first - assert messages[0]["role"] == "system" - prompt += f"{IM_START_TOKEN}system" - prompt += f"\n{messages[0]['content']}" - - # Next is the functions preamble - def create_function_description(schema): - # airorobos style - func_str = "" - func_str += f"{schema['name']}:" - func_str += f"\n description: {schema['description']}" - func_str += f"\n params:" - for param_k, param_v in schema["parameters"]["properties"].items(): - # TODO we're ignoring type - func_str += f"\n {param_k}: {param_v['description']}" - # TODO we're ignoring schema['parameters']['required'] - return func_str - - # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format." - prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format." - prompt += f"\nAvailable functions:" - for function_dict in functions: - prompt += f"\n{create_function_description(function_dict)}" - - # Put functions INSIDE system message (TODO experiment with this) - prompt += IM_END_TOKEN - - def create_function_call(function_call): - """Go from ChatCompletion to Airoboros style function trace (in prompt) - - ChatCompletion data (inside message['function_call']): - "function_call": { - "name": ... - "arguments": { - "arg1": val1, - ... - } - - Airoboros output: - { - "function": "send_message", - "params": { - "message": "Hello there! I am Sam, an AI developed by Liminal Corp. How can I assist you today?" - } - } - """ - airo_func_call = { - "function": function_call["name"], - "params": json.loads(function_call["arguments"]), - } - return json.dumps(airo_func_call, indent=2) - - # option (1): from HF README: - # <|im_start|>user - # {prompt}<|im_end|> - # <|im_start|>assistant - # {assistant reply} - # {function output (if function)} - - # option (2): take liberties - # <|im_start|>user - # {prompt}<|im_end|> - # <|im_start|>assistant - # or - # <|im_start|>function - - # Add a sep for the conversation - # if self.include_section_separators: - # prompt += "\n### INPUT" - - # Last are the user/assistant messages - for message in messages[1:]: - assert message["role"] in ["user", "assistant", "function"], message - - if message["role"] == "user": - if self.simplify_json_content: - try: - content_json = json.loads(message["content"]) - content_simple = content_json["message"] - prompt += ( - f"\n{IM_START_TOKEN}user\n{content_simple}{IM_END_TOKEN}" - ) - # prompt += f"\nUSER: {content_simple}" - except: - prompt += f"\n{IM_START_TOKEN}user\n{message['content']}{IM_END_TOKEN}" - # prompt += f"\nUSER: {message['content']}" - elif message["role"] == "assistant": - prompt += f"\n{IM_START_TOKEN}assistant" - if message["content"] is not None: - prompt += f"\n{message['content']}" - # prompt += f"\nASSISTANT: {message['content']}" - # need to add the function call if there was one - if message["function_call"]: - prompt += f"\n{create_function_call(message['function_call'])}" - prompt += f"{IM_END_TOKEN}" - elif message["role"] == "function": - # TODO find a good way to add this - # prompt += f"\nASSISTANT: (function return) {message['content']}" - prompt += f"\n{IM_START_TOKEN}assistant" - prompt += f"\nFUNCTION RETURN: {message['content']}" - # prompt += f"\nFUNCTION RETURN: {message['content']}" - continue - else: - raise ValueError(message) - - # Add a sep for the response - # if self.include_section_separators: - # prompt += "\n### RESPONSE" - - if self.include_assistant_prefix: - # prompt += f"\nASSISTANT:" - prompt += f"\n{IM_START_TOKEN}assistant" - if self.include_opening_brance_in_prefix: - prompt += "\n{" - - print(prompt) - return prompt - - def clean_function_args(self, function_name, function_args): - """Some basic MemGPT-specific cleaning of function args""" - cleaned_function_name = function_name - cleaned_function_args = function_args.copy() - - if function_name == "send_message": - # strip request_heartbeat - cleaned_function_args.pop("request_heartbeat", None) - - # TODO more cleaning to fix errors LLM makes - return cleaned_function_name, cleaned_function_args - - def output_to_chat_completion_response(self, raw_llm_output): - """Turn raw LLM output into a ChatCompletion style response with: - "message" = { - "role": "assistant", - "content": ..., - "function_call": { - "name": ... - "arguments": { - "arg1": val1, - ... - } - } - } - """ - if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{": - raw_llm_output = "{" + raw_llm_output - - try: - function_json_output = json.loads(raw_llm_output) - except Exception as e: - raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output}") - function_name = function_json_output["function"] - function_parameters = function_json_output["params"] - - if self.clean_func_args: - function_name, function_parameters = self.clean_function_args( - function_name, function_parameters - ) - - message = { - "role": "assistant", - "content": None, - "function_call": { - "name": function_name, - "arguments": json.dumps(function_parameters), - }, - } - return message diff --git a/memgpt/local_llm/webui/settings.py b/memgpt/local_llm/webui/settings.py index 64335199eb..2e9ecbce60 100644 --- a/memgpt/local_llm/webui/settings.py +++ b/memgpt/local_llm/webui/settings.py @@ -2,7 +2,6 @@ "stopping_strings": [ "\nUSER:", "\nASSISTANT:", - "\nFUNCTION RETURN:", # '\n' + # '', # '<|', From 4e5e3f8682911b0389692740dbaa175317e9ba5d Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:42:29 -0700 Subject: [PATCH 4/6] Revert "Revert "cleanup"" This reverts commit 516c24c73ba7b25bc55e396675441e5bf95f3569. --- memgpt/local_llm/chat_completion_proxy.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py index de9da221fb..d31dc767cf 100644 --- a/memgpt/local_llm/chat_completion_proxy.py +++ b/memgpt/local_llm/chat_completion_proxy.py @@ -5,12 +5,14 @@ import json from .webui.api import get_webui_completion -from .llm_chat_completion_wrappers import airoboros +from .llm_chat_completion_wrappers import airoboros, dolphin from .utils import DotDict HOST = os.getenv("OPENAI_API_BASE") HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion DEBUG = False +# DEBUG = True +DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper() async def get_chat_completion( @@ -22,14 +24,14 @@ async def get_chat_completion( if function_call != "auto": raise ValueError(f"function_call == {function_call} not supported (auto only)") - if model == "airoboros_v2.1": - llm_wrapper = airoboros.Airoboros21Wrapper() + if model == "airoboros-l2-70b-2.1": + llm_wrapper = airoboros.Airoboros21InnerMonologueWrapper() + elif model == "dolphin-2.1-mistral-7b": + llm_wrapper = dolphin.Dolphin21MistralWrapper() else: # Warn the user that we're using the fallback - print( - f"Warning: could not find an LLM wrapper for {model}, using the airoboros wrapper" - ) - llm_wrapper = airoboros.Airoboros21Wrapper() + print(f"Warning: no wrapper specified for local LLM, using the default wrapper") + llm_wrapper = DEFAULT_WRAPPER # First step: turn the message sequence into a prompt that the model expects prompt = llm_wrapper.chat_completion_to_prompt(messages, functions) From 387b190dcc1a84fb73d0b86eda18cd573537bac7 Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:42:35 -0700 Subject: [PATCH 5/6] Revert "Revert "cleanup"" This reverts commit ff418e4543e00f537b9ae21f18600688b1525749. --- README.md | 66 ++--- memgpt/config.py | 14 +- memgpt/constants.py | 20 +- memgpt/local_llm/README.md | 157 ++++++++--- memgpt/local_llm/chat_completion_proxy.py | 1 - .../llm_chat_completion_wrappers/airoboros.py | 213 +++++++++++++++ .../llm_chat_completion_wrappers/dolphin.py | 243 ++++++++++++++++++ memgpt/local_llm/webui/settings.py | 1 + memgpt/main.py | 9 +- memgpt/utils.py | 159 ++++++++---- pyproject.toml | 2 +- 11 files changed, 734 insertions(+), 151 deletions(-) create mode 100644 memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py diff --git a/README.md b/README.md index ae521929b1..04d3048a72 100644 --- a/README.md +++ b/README.md @@ -71,15 +71,10 @@ Memory-GPT (or MemGPT in short) is a system that intelligently manages different ## Running MemGPT locally -Install MemGPT: +Install dependencies: ```sh -pip install pymemgpt -``` - -To update the package, run -```sh -pip install pymemgpt -U +pip install -r requirements.txt ``` Add your OpenAI API key to your environment: @@ -94,37 +89,12 @@ export OPENAI_API_KEY=YOUR_API_KEY set OPENAI_API_KEY=YOUR_API_KEY ``` -To run MemGPT for as a conversation agent in CLI mode, simply run `memgpt`: +To run MemGPT for as a conversation agent in CLI mode, simply run `main.py`: ```sh -memgpt +python3 main.py ``` -
-Debugging command not found - -If you get `command not found` (Linux/MacOS), or a `CommandNotFoundException` (Windows), the directory where pip installs scripts is not in your PATH. You can either add that directory to your path (`pip show pip | grep Scripts`) or instead just run: -```sh -python -m memgpt -``` -
- -
-Building from source - -Clone this repo: `git clone https://github.com/cpacker/MemGPT.git` - -Using poetry: -1. Install poetry: `pip install poetry` -2. Run `poetry install` -3. Run `poetry run memgpt` - -Using pip: -1. Run `pip install -e .` -2. Run `python3 main.py` -
- - If you're using Azure OpenAI, set these variables instead: ```sh @@ -135,31 +105,31 @@ export AZURE_OPENAI_VERSION = ... export AZURE_OPENAI_DEPLOYMENT = ... # then use the --use_azure_openai flag -memgpt --use_azure_openai +python main.py --use_azure_openai ``` -To create a new starter user or starter persona (that MemGPT gets initialized with), create a new `.txt` file in `~/.memgpt/humans` or `~/.memgpt/personas`, then use the `--persona` or `--human` flag when running `main.py`. For example: +To create a new starter user or starter persona (that MemGPT gets initialized with), create a new `.txt` file in [/memgpt/humans/examples](/memgpt/humans/examples) or [/memgpt/personas/examples](/memgpt/personas/examples), then use the `--persona` or `--human` flag when running `main.py`. For example: + ```sh -# assuming you created a new file ~/.memgpt/humans/me.txt -memgpt +# assuming you created a new file /memgpt/humans/examples/me.txt +python main.py # Select me.txt during configuration process ``` -- OR -- ```sh -# assuming you created a new file ~/.memgpt/humans/me.txt -memgpt --human me.txt +# assuming you created a new file /memgpt/humans/examples/me.txt +python main.py --human me.txt ``` -You can also specify any of the starter users in [/memgpt/humans/examples](/memgpt/humans/examples) or any of the starter personas in [/memgpt/personas/examples](/memgpt/personas/examples). ### GPT-3.5 support You can run MemGPT with GPT-3.5 as the LLM instead of GPT-4: ```sh -memgpt +python main.py # Select gpt-3.5 during configuration process ``` -- OR -- ```sh -memgpt --model gpt-3.5-turbo +python main.py --model gpt-3.5-turbo ``` **Note that this is experimental gpt-3.5-turbo support. It's quite buggy compared to gpt-4, but it should be runnable.** @@ -240,7 +210,7 @@ id | name | age To talk to this database, run: ```sh -memgpt --archival_storage_sqldb=memgpt/personas/examples/sqldb/test.db +python main.py --archival_storage_sqldb=memgpt/personas/examples/sqldb/test.db ``` And then you can input the path to your database, and your query. @@ -263,7 +233,7 @@ To run our example where you can search over the SEC 10-K filings of Uber, Lyft, 2. In the root `MemGPT` directory, run ```bash - memgpt --archival_storage_files="memgpt/personas/examples/preload_archival/*.txt" --persona=memgpt_doc --human=basic + python3 main.py --archival_storage_files="memgpt/personas/examples/preload_archival/*.txt" --persona=memgpt_doc --human=basic ``` If you would like to load your own local files into MemGPT's archival memory, run the command above but replace `--archival_storage_files="memgpt/personas/examples/preload_archival/*.txt"` with your own file glob expression (enclosed in quotes). @@ -271,7 +241,7 @@ If you would like to load your own local files into MemGPT's archival memory, ru #### Enhance with embeddings search In the root `MemGPT` directory, run ```bash - memgpt main.py --archival_storage_files_compute_embeddings="" --persona=memgpt_doc --human=basic + python3 main.py --archival_storage_files_compute_embeddings="" --persona=memgpt_doc --human=basic ``` This will generate embeddings, stick them into a FAISS index, and write the index to a directory, and then output: @@ -282,7 +252,7 @@ This will generate embeddings, stick them into a FAISS index, and write the inde If you want to reuse these embeddings, run ```bash -memgpt --archival_storage_faiss_path="" --persona=memgpt_doc --human=basic +python3 main.py --archival_storage_faiss_path="" --persona=memgpt_doc --human=basic ``` @@ -314,7 +284,7 @@ MemGPT also enables you to chat with docs -- try running this example to talk to 3. In the root `MemGPT` directory, run ```bash - memgpt --archival_storage_faiss_path= --persona=memgpt_doc --human=basic + python3 main.py --archival_storage_faiss_path= --persona=memgpt_doc --human=basic ``` where `ARCHIVAL_STORAGE_FAISS_PATH` is the directory where `all_docs.jsonl` and `all_docs.index` are located. If you downloaded from Hugging Face, it will be `memgpt/personas/docqa/llamaindex-api-docs`. diff --git a/memgpt/config.py b/memgpt/config.py index 72879cc454..0e495dcaaa 100644 --- a/memgpt/config.py +++ b/memgpt/config.py @@ -14,6 +14,7 @@ import memgpt.interface as interface from memgpt.personas.personas import get_persona_text from memgpt.humans.humans import get_human_text +from memgpt.constants import MEMGPT_DIR model_choices = [ questionary.Choice("gpt-4"), @@ -22,15 +23,14 @@ value="gpt-3.5-turbo", ), ] -memgpt_dir = os.path.join(os.path.expanduser("~"), ".memgpt") class Config: personas_dir = os.path.join("memgpt", "personas", "examples") - custom_personas_dir = os.path.join(memgpt_dir, "personas") + custom_personas_dir = os.path.join(MEMGPT_DIR, "personas") humans_dir = os.path.join("memgpt", "humans", "examples") - custom_humans_dir = os.path.join(memgpt_dir, "humans") - configs_dir = os.path.join(memgpt_dir, "configs") + custom_humans_dir = os.path.join(MEMGPT_DIR, "humans") + configs_dir = os.path.join(MEMGPT_DIR, "configs") def __init__(self): os.makedirs(Config.custom_personas_dir, exist_ok=True) @@ -247,7 +247,8 @@ def get_memgpt_personas(): + Config.get_persona_choices( [p for p in custom_personas_in_examples + default_personas], get_persona_text, - Config.personas_dir, + None, + # Config.personas_dir, ) + [ questionary.Separator(), @@ -274,7 +275,8 @@ def get_user_personas(): + Config.get_persona_choices( [p for p in custom_personas_in_examples + default_personas], get_human_text, - Config.humans_dir, + None, + # Config.humans_dir, ) + [ questionary.Separator(), diff --git a/memgpt/constants.py b/memgpt/constants.py index 33924e47bd..bd83f7fcf9 100644 --- a/memgpt/constants.py +++ b/memgpt/constants.py @@ -1,9 +1,15 @@ -DEFAULT_MEMGPT_MODEL = 'gpt-4' +import os + +MEMGPT_DIR = os.path.join(os.path.expanduser("~"), ".memgpt") + +DEFAULT_MEMGPT_MODEL = "gpt-4" FIRST_MESSAGE_ATTEMPTS = 10 INITIAL_BOOT_MESSAGE = "Boot sequence complete. Persona activated." -INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT = "Bootup sequence complete. Persona activated. Testing messaging functionality." +INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT = ( + "Bootup sequence complete. Persona activated. Testing messaging functionality." +) STARTUP_QUOTES = [ "I think, therefore I am.", "All those moments will be lost in time, like tears in rain.", @@ -12,7 +18,7 @@ INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG = STARTUP_QUOTES[2] # Constants to do with summarization / conversation length window -MESSAGE_SUMMARY_WARNING_TOKENS = 7000 # the number of tokens consumed in a call before a system warning goes to the agent +MESSAGE_SUMMARY_WARNING_TOKENS = 7000 # the number of tokens consumed in a call before a system warning goes to the agent MESSAGE_SUMMARY_WARNING_STR = f"Warning: the conversation history will soon reach its maximum length and be trimmed. Make sure to save any important information from the conversation to your memory before it is removed." # Default memory limits @@ -21,11 +27,13 @@ MAX_PAUSE_HEARTBEATS = 360 # in min -MESSAGE_CHATGPT_FUNCTION_MODEL = 'gpt-3.5-turbo' -MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE = 'You are a helpful assistant. Keep your responses short and concise.' +MESSAGE_CHATGPT_FUNCTION_MODEL = "gpt-3.5-turbo" +MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE = ( + "You are a helpful assistant. Keep your responses short and concise." +) #### Functions related REQ_HEARTBEAT_MESSAGE = "request_heartbeat == true" FUNC_FAILED_HEARTBEAT_MESSAGE = "Function call failed" -FUNCTION_PARAM_DESCRIPTION_REQ_HEARTBEAT = "Request an immediate heartbeat after function execution. Set to 'true' if you want to send a follow-up message or run a follow-up function." \ No newline at end of file +FUNCTION_PARAM_DESCRIPTION_REQ_HEARTBEAT = "Request an immediate heartbeat after function execution. Set to 'true' if you want to send a follow-up message or run a follow-up function." diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md index a79c0f9e5f..e285a49964 100644 --- a/memgpt/local_llm/README.md +++ b/memgpt/local_llm/README.md @@ -1,19 +1,110 @@ ⁉️ Need help configuring local LLMs with MemGPT? Ask for help on [our Discord](https://discord.gg/9GEQrxmVyE) or [post on the GitHub discussion](https://github.com/cpacker/MemGPT/discussions/67). -👀 If you have a hosted ChatCompletion-compatible endpoint that works with function calling, you can simply set `OPENAI_API_BASE` (`export OPENAI_API_BASE=...`) to the IP+port of your endpoint. **As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.** +If you have a hosted ChatCompletion-compatible endpoint that works with function calling, you can simply set `OPENAI_API_BASE` (`export OPENAI_API_BASE=...`) to the IP+port of your endpoint. **As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.** -🙋 Our examples assume that you're using [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui) to put your LLMs behind a web server. If you need help setting this up, check the instructions [here](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui). More LLM web server support to come soon (tell us what you use and we'll add it)! +--- + +# ⚡ Quick overview + +1. Put your own LLM behind a web server API (e.g. [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)) +2. Set `OPENAI_API_BASE=YOUR_API_IP_ADDRESS` and `BACKEND_TYPE=webui` +3. Run MemGPT with `python3 main.py --no_verify`, it should now use your LLM instead of OpenAI GPT +4. If things aren't working, read the full instructions below + +When using open LLMs with MemGPT, **the main failure case will be your LLM outputting a string that cannot be understood by MemGPT**. MemGPT uses function calling to manage memory (eg `edit_core_memory(...)` and interact with the user (`send_message(...)`), so your LLM needs generate outputs that can be parsed into MemGPT function calls. --- # How to connect MemGPT to non-OpenAI LLMs -**If you have an LLM that is function-call finetuned**: - - Implement a wrapper class for that model - - The wrapper class needs to implement two functions: - - One to go from ChatCompletion messages/functions schema to a prompt string - - And one to go from raw LLM outputs to a ChatCompletion response - - Put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE` +
+

🖥️ Serving your LLM from a web server (WebUI example)

+ +To get MemGPT to work with a local LLM, you need to have the LLM running on a server that takes API requests. + +For the purposes of this example, we're going to serve (host) the LLMs using [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui), but if you want to use something else you can! This also assumes your running web UI locally - if you're running on e.g. Runpod, you'll want to follow Runpod specific instructions (for example use [TheBloke's one-click UI and API](https://github.com/TheBlokeAI/dockerLLM/blob/main/README_Runpod_LocalLLMsUIandAPI.md)) + +1. Install oobabooga web UI using the instructions [here](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui) +2. Once installed, launch the web server with `python server.py` +3. Navigate to the web app (if local, this is probably [`http://127.0.0.1:7860`](http://localhost:7860)), select the model you want to use, adjust your GPU and CPU memory settings, and click "load" +4. If the model was loaded successfully, you should be able to access it via the API (if local, this is probably on port `5000`) +5. Assuming steps 1-4 went correctly, the LLM is now properly hosted on a port you can point MemGPT to! + +WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py). + +⁉️ If you have problems getting WebUI setup, please use the [official web UI repo for support](https://github.com/oobabooga/text-generation-webui)! There will be more answered questions about web UI there vs here on the MemGPT repo. + +
+ +
+

🦙 Running MemGPT with your own LLM

+ +Once you have an LLM web server set up, all you need to do to connect it to MemGPT is set two environment variables: + +- `OPENAI_API_BASE` + - set this to the IP address of your LLM API - for example, if you're using web UI on a local machine, this will look like `http://127.0.0.1:5000` +- `BACKEND_TYPE` + - set this to `webui` + - this controls how MemGPT packages the HTTP request to the webserver, see [this code](https://github.com/cpacker/MemGPT/blob/main/memgpt/local_llm/webui/api.py) + - currently this is set up to work with web UI, but it might work with other backends / web servers too! + - if you'd like to use a different web server and you need a different style of HTTP request, let us know on the discussion page (https://github.com/cpacker/MemGPT/discussions/67) and we'll try to add it ASAP + +You can change the prompt format and output parser used with the `--model` flag. For example: + +```sh +# this will cause MemGPT to use the airoboros-l2-70b-2.1 parsers, regardless of what model you're hosting on your web server +# you can mix and match parsers + models! +$ python3 main.py --model airoboros-l2-70b-2.1 +``` + +### Example with airoboros 70b + +```sh +# assuming we're running a model (eg airoboros) behind a textgen webui server +export OPENAI_API_BASE=127.0.0.1:5000 # change this to your actual API address +export BACKEND_TYPE=webui # if you don't set this, MemGPT will throw an error + +# using --no_verify can be helpful if the LLM you're using doesn't output inner monologue properly +$ python3 main.py --no_verify + +Running... [exit by typing '/exit'] +💭 Bootup sequence complete. Persona activated. Testing messaging functionality. + +💭 None +🤖 Welcome! My name is Sam. How can I assist you today? +Enter your message: My name is Brad, not Chad... + +💭 None +⚡🧠 [function] updating memory with core_memory_replace: + First name: Chad + → First name: Brad +``` + +
+ +
+

🙋 Adding support for new LLMs + improving performance

+ +⁉️ When using open LLMs with MemGPT, **the main failure case will be your LLM outputting a string that cannot be understood by MemGPT**. MemGPT uses function calling to manage memory (eg `edit_core_memory(...)` and interact with the user (`send_message`), so your LLM needs generate outputs that can be parsed into MemGPT function calls. + +### What is a "wrapper"? + +To support function calling with open LLMs for MemGPT, we utilize "wrapper" code that: + +1. turns `system` (the MemGPT instructions), `messages` (the MemGPT conversation window), and `functions` (the MemGPT function set) parameters from ChatCompletion into a single unified prompt string for your LLM +2. turns the output string generated by your LLM back into a MemGPT function call + +Different LLMs are trained using different prompt formats (eg `#USER:` vs `user` vs ...), and LLMs that are trained on function calling are often trained using different function call formats, so if you're getting poor performance, try experimenting with different prompt formats! We recommend starting with the prompt format (and function calling format) recommended in the HuggingFace model card, and experimenting from there. + +We currently only support a few prompt formats in this repo ([located here](https://github.com/cpacker/MemGPT/tree/main/memgpt/local_llm/llm_chat_completion_wrappers))! If you write a new parser, please open a PR and we'll merge it in. + +
+

Adding a new wrapper (change the prompt format + function parser)

+ +To make a new wrapper (for example, because you want to try a different prompt format), you just need to subclass `LLMChatCompletionWrapper`. Your new wrapper class needs to implement two functions: + +- One to go from ChatCompletion messages/functions schema to a prompt string +- And one to go from raw LLM outputs to a ChatCompletion response ```python class LLMChatCompletionWrapper(ABC): @@ -29,6 +120,13 @@ class LLMChatCompletionWrapper(ABC): pass ``` +You can follow our example wrappers ([located here](https://github.com/cpacker/MemGPT/tree/main/memgpt/local_llm/llm_chat_completion_wrappers)). + +
+ +
+

Example wrapper for Airoboros

+ ## Example with [Airoboros](https://huggingface.co/jondurbin/airoboros-l2-70b-2.1) (llama2 finetune) To help you get started, we've implemented an example wrapper class for a popular llama2 model **finetuned on function calling** (Airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP. @@ -58,35 +156,19 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper): } """ ``` -See full file [here](llm_chat_completion_wrappers/airoboros.py). WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py). -### Running the example +See full file [here](llm_chat_completion_wrappers/airoboros.py). -```sh -# running airoboros behind a textgen webui server -export OPENAI_API_BASE = -export BACKEND_TYPE = webui +
-# using --no_verify because this airoboros example does not output inner monologue, just functions -# airoboros is able to properly call `send_message` -$ python3 main.py --no_verify - -Running... [exit by typing '/exit'] -💭 Bootup sequence complete. Persona activated. Testing messaging functionality. - -💭 None -🤖 Welcome! My name is Sam. How can I assist you today? -Enter your message: My name is Brad, not Chad... - -💭 None -⚡🧠 [function] updating memory with core_memory_replace: - First name: Chad - → First name: Brad -``` +
--- -## Status of ChatCompletion w/ function calling and open LLMs +## FAQ + +
+

Status of ChatCompletion w/ function calling and open LLMs

MemGPT uses function calling to do memory management. With [OpenAI's ChatCompletion API](https://platform.openai.com/docs/api-reference/chat/), you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call. @@ -96,8 +178,19 @@ In the future, more open LLMs and LLM servers (that can host OpenAI-compatable C 2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one. -## What is this all this extra code for? +
+
+

What is this all this extra code for?

+ Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that adds parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set. To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting the environment variables `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output). + +
+ +
+

Need more help?

+ + Ask for help on [our Discord](https://discord.gg/9GEQrxmVyE) or [post on the GitHub discussion](https://github.com/cpacker/MemGPT/discussions/67). +
diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py index d31dc767cf..41442781ec 100644 --- a/memgpt/local_llm/chat_completion_proxy.py +++ b/memgpt/local_llm/chat_completion_proxy.py @@ -11,7 +11,6 @@ HOST = os.getenv("OPENAI_API_BASE") HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion DEBUG = False -# DEBUG = True DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper() diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py index 98d3625e2f..60f8ee6b80 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py @@ -150,6 +150,7 @@ def create_function_call(function_call): if self.include_opening_brance_in_prefix: prompt += "\n{" + print(prompt) return prompt def clean_function_args(self, function_name, function_args): @@ -202,3 +203,215 @@ def output_to_chat_completion_response(self, raw_llm_output): }, } return message + + +class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper): + """Still expect only JSON outputs from model, but add inner monologue as a field""" + + def __init__( + self, + simplify_json_content=True, + clean_function_args=True, + include_assistant_prefix=True, + include_opening_brace_in_prefix=True, + include_section_separators=True, + ): + self.simplify_json_content = simplify_json_content + self.clean_func_args = clean_function_args + self.include_assistant_prefix = include_assistant_prefix + self.include_opening_brance_in_prefix = include_opening_brace_in_prefix + self.include_section_separators = include_section_separators + + def chat_completion_to_prompt(self, messages, functions): + """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format + + A chat. + USER: {prompt} + ASSISTANT: + + Functions support: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling + + As an AI assistant, please select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format. + + Input: I want to know how many times 'Python' is mentioned in my text file. + + Available functions: + file_analytics: + description: This tool performs various operations on a text file. + params: + action: The operation we want to perform on the data, such as "count_occurrences", "find_line", etc. + filters: + keyword: The word or phrase we want to search for. + + OpenAI functions schema style: + + { + "name": "send_message", + "description": "Sends a message to the human user", + "parameters": { + "type": "object", + "properties": { + # https://json-schema.org/understanding-json-schema/reference/array.html + "message": { + "type": "string", + "description": "Message contents. All unicode (including emojis) are supported.", + }, + }, + "required": ["message"], + } + }, + """ + prompt = "" + + # System insturctions go first + assert messages[0]["role"] == "system" + prompt += messages[0]["content"] + + # Next is the functions preamble + def create_function_description(schema, add_inner_thoughts=True): + # airorobos style + func_str = "" + func_str += f"{schema['name']}:" + func_str += f"\n description: {schema['description']}" + func_str += f"\n params:" + if add_inner_thoughts: + func_str += ( + f"\n inner_thoughts: Deep inner monologue private to you only." + ) + for param_k, param_v in schema["parameters"]["properties"].items(): + # TODO we're ignoring type + func_str += f"\n {param_k}: {param_v['description']}" + # TODO we're ignoring schema['parameters']['required'] + return func_str + + # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format." + prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format." + prompt += f"\nAvailable functions:" + for function_dict in functions: + prompt += f"\n{create_function_description(function_dict)}" + + def create_function_call(function_call, inner_thoughts=None): + """Go from ChatCompletion to Airoboros style function trace (in prompt) + + ChatCompletion data (inside message['function_call']): + "function_call": { + "name": ... + "arguments": { + "arg1": val1, + ... + } + + Airoboros output: + { + "function": "send_message", + "params": { + "message": "Hello there! I am Sam, an AI developed by Liminal Corp. How can I assist you today?" + } + } + """ + airo_func_call = { + "function": function_call["name"], + "params": { + "inner_thoughts": inner_thoughts, + **json.loads(function_call["arguments"]), + }, + } + return json.dumps(airo_func_call, indent=2) + + # Add a sep for the conversation + if self.include_section_separators: + prompt += "\n### INPUT" + + # Last are the user/assistant messages + for message in messages[1:]: + assert message["role"] in ["user", "assistant", "function"], message + + if message["role"] == "user": + if self.simplify_json_content: + try: + content_json = json.loads(message["content"]) + content_simple = content_json["message"] + prompt += f"\nUSER: {content_simple}" + except: + prompt += f"\nUSER: {message['content']}" + elif message["role"] == "assistant": + prompt += f"\nASSISTANT:" + # need to add the function call if there was one + inner_thoughts = message["content"] + if message["function_call"]: + prompt += f"\n{create_function_call(message['function_call'], inner_thoughts=inner_thoughts)}" + elif message["role"] == "function": + # TODO find a good way to add this + # prompt += f"\nASSISTANT: (function return) {message['content']}" + prompt += f"\nFUNCTION RETURN: {message['content']}" + continue + else: + raise ValueError(message) + + # Add a sep for the response + if self.include_section_separators: + prompt += "\n### RESPONSE" + + if self.include_assistant_prefix: + prompt += f"\nASSISTANT:" + if self.include_opening_brance_in_prefix: + prompt += "\n{" + + return prompt + + def clean_function_args(self, function_name, function_args): + """Some basic MemGPT-specific cleaning of function args""" + cleaned_function_name = function_name + cleaned_function_args = function_args.copy() + + if function_name == "send_message": + # strip request_heartbeat + cleaned_function_args.pop("request_heartbeat", None) + + inner_thoughts = None + if "inner_thoughts" in function_args: + inner_thoughts = cleaned_function_args.pop("inner_thoughts") + + # TODO more cleaning to fix errors LLM makes + return inner_thoughts, cleaned_function_name, cleaned_function_args + + def output_to_chat_completion_response(self, raw_llm_output): + """Turn raw LLM output into a ChatCompletion style response with: + "message" = { + "role": "assistant", + "content": ..., + "function_call": { + "name": ... + "arguments": { + "arg1": val1, + ... + } + } + } + """ + if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{": + raw_llm_output = "{" + raw_llm_output + + try: + function_json_output = json.loads(raw_llm_output) + except Exception as e: + raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output}") + function_name = function_json_output["function"] + function_parameters = function_json_output["params"] + + if self.clean_func_args: + ( + inner_thoughts, + function_name, + function_parameters, + ) = self.clean_function_args(function_name, function_parameters) + + message = { + "role": "assistant", + "content": inner_thoughts, + "function_call": { + "name": function_name, + "arguments": json.dumps(function_parameters), + }, + } + return message diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py new file mode 100644 index 0000000000..d1ae5f22e6 --- /dev/null +++ b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py @@ -0,0 +1,243 @@ +import json + +from .wrapper_base import LLMChatCompletionWrapper + + +class Dolphin21MistralWrapper(LLMChatCompletionWrapper): + """Wrapper for Dolphin 2.1 Mistral 7b: https://huggingface.co/ehartford/dolphin-2.1-mistral-7b + + Note: this wrapper formats a prompt that only generates JSON, no inner thoughts + """ + + def __init__( + self, + simplify_json_content=True, + clean_function_args=True, + include_assistant_prefix=True, + include_opening_brace_in_prefix=True, + include_section_separators=False, + ): + self.simplify_json_content = simplify_json_content + self.clean_func_args = clean_function_args + self.include_assistant_prefix = include_assistant_prefix + self.include_opening_brance_in_prefix = include_opening_brace_in_prefix + self.include_section_separators = include_section_separators + + def chat_completion_to_prompt(self, messages, functions): + """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format + + <|im_start|>system + You are Dolphin, a helpful AI assistant.<|im_end|> + <|im_start|>user + {prompt}<|im_end|> + <|im_start|>assistant + + Do function spec Airoboros style inside the system message: + Functions support: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling + + As an AI assistant, please select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format. + + Input: I want to know how many times 'Python' is mentioned in my text file. + + Available functions: + file_analytics: + description: This tool performs various operations on a text file. + params: + action: The operation we want to perform on the data, such as "count_occurrences", "find_line", etc. + filters: + keyword: The word or phrase we want to search for. + + OpenAI functions schema style: + + { + "name": "send_message", + "description": "Sends a message to the human user", + "parameters": { + "type": "object", + "properties": { + # https://json-schema.org/understanding-json-schema/reference/array.html + "message": { + "type": "string", + "description": "Message contents. All unicode (including emojis) are supported.", + }, + }, + "required": ["message"], + } + }, + """ + prompt = "" + + # <|im_start|>system + # You are Dolphin, a helpful AI assistant.<|im_end|> + + IM_START_TOKEN = "<|im_start|>" + IM_END_TOKEN = "<|im_end|>" + + # System instructions go first + assert messages[0]["role"] == "system" + prompt += f"{IM_START_TOKEN}system" + prompt += f"\n{messages[0]['content']}" + + # Next is the functions preamble + def create_function_description(schema): + # airorobos style + func_str = "" + func_str += f"{schema['name']}:" + func_str += f"\n description: {schema['description']}" + func_str += f"\n params:" + for param_k, param_v in schema["parameters"]["properties"].items(): + # TODO we're ignoring type + func_str += f"\n {param_k}: {param_v['description']}" + # TODO we're ignoring schema['parameters']['required'] + return func_str + + # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format." + prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format." + prompt += f"\nAvailable functions:" + for function_dict in functions: + prompt += f"\n{create_function_description(function_dict)}" + + # Put functions INSIDE system message (TODO experiment with this) + prompt += IM_END_TOKEN + + def create_function_call(function_call): + """Go from ChatCompletion to Airoboros style function trace (in prompt) + + ChatCompletion data (inside message['function_call']): + "function_call": { + "name": ... + "arguments": { + "arg1": val1, + ... + } + + Airoboros output: + { + "function": "send_message", + "params": { + "message": "Hello there! I am Sam, an AI developed by Liminal Corp. How can I assist you today?" + } + } + """ + airo_func_call = { + "function": function_call["name"], + "params": json.loads(function_call["arguments"]), + } + return json.dumps(airo_func_call, indent=2) + + # option (1): from HF README: + # <|im_start|>user + # {prompt}<|im_end|> + # <|im_start|>assistant + # {assistant reply} + # {function output (if function)} + + # option (2): take liberties + # <|im_start|>user + # {prompt}<|im_end|> + # <|im_start|>assistant + # or + # <|im_start|>function + + # Add a sep for the conversation + # if self.include_section_separators: + # prompt += "\n### INPUT" + + # Last are the user/assistant messages + for message in messages[1:]: + assert message["role"] in ["user", "assistant", "function"], message + + if message["role"] == "user": + if self.simplify_json_content: + try: + content_json = json.loads(message["content"]) + content_simple = content_json["message"] + prompt += ( + f"\n{IM_START_TOKEN}user\n{content_simple}{IM_END_TOKEN}" + ) + # prompt += f"\nUSER: {content_simple}" + except: + prompt += f"\n{IM_START_TOKEN}user\n{message['content']}{IM_END_TOKEN}" + # prompt += f"\nUSER: {message['content']}" + elif message["role"] == "assistant": + prompt += f"\n{IM_START_TOKEN}assistant" + if message["content"] is not None: + prompt += f"\n{message['content']}" + # prompt += f"\nASSISTANT: {message['content']}" + # need to add the function call if there was one + if message["function_call"]: + prompt += f"\n{create_function_call(message['function_call'])}" + prompt += f"{IM_END_TOKEN}" + elif message["role"] == "function": + # TODO find a good way to add this + # prompt += f"\nASSISTANT: (function return) {message['content']}" + prompt += f"\n{IM_START_TOKEN}assistant" + prompt += f"\nFUNCTION RETURN: {message['content']}" + # prompt += f"\nFUNCTION RETURN: {message['content']}" + continue + else: + raise ValueError(message) + + # Add a sep for the response + # if self.include_section_separators: + # prompt += "\n### RESPONSE" + + if self.include_assistant_prefix: + # prompt += f"\nASSISTANT:" + prompt += f"\n{IM_START_TOKEN}assistant" + if self.include_opening_brance_in_prefix: + prompt += "\n{" + + print(prompt) + return prompt + + def clean_function_args(self, function_name, function_args): + """Some basic MemGPT-specific cleaning of function args""" + cleaned_function_name = function_name + cleaned_function_args = function_args.copy() + + if function_name == "send_message": + # strip request_heartbeat + cleaned_function_args.pop("request_heartbeat", None) + + # TODO more cleaning to fix errors LLM makes + return cleaned_function_name, cleaned_function_args + + def output_to_chat_completion_response(self, raw_llm_output): + """Turn raw LLM output into a ChatCompletion style response with: + "message" = { + "role": "assistant", + "content": ..., + "function_call": { + "name": ... + "arguments": { + "arg1": val1, + ... + } + } + } + """ + if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{": + raw_llm_output = "{" + raw_llm_output + + try: + function_json_output = json.loads(raw_llm_output) + except Exception as e: + raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output}") + function_name = function_json_output["function"] + function_parameters = function_json_output["params"] + + if self.clean_func_args: + function_name, function_parameters = self.clean_function_args( + function_name, function_parameters + ) + + message = { + "role": "assistant", + "content": None, + "function_call": { + "name": function_name, + "arguments": json.dumps(function_parameters), + }, + } + return message diff --git a/memgpt/local_llm/webui/settings.py b/memgpt/local_llm/webui/settings.py index 2e9ecbce60..64335199eb 100644 --- a/memgpt/local_llm/webui/settings.py +++ b/memgpt/local_llm/webui/settings.py @@ -2,6 +2,7 @@ "stopping_strings": [ "\nUSER:", "\nASSISTANT:", + "\nFUNCTION RETURN:", # '\n' + # '', # '<|', diff --git a/memgpt/main.py b/memgpt/main.py index 2ef440eb57..1b754d76e1 100644 --- a/memgpt/main.py +++ b/memgpt/main.py @@ -26,7 +26,8 @@ InMemoryStateManagerWithFaiss, ) -from memgpt.config import Config, memgpt_dir +from memgpt.config import Config +from memgpt.constants import MEMGPT_DIR import asyncio app = typer.Typer() @@ -43,7 +44,7 @@ def clear_line(): def save(memgpt_agent, cfg): filename = utils.get_local_time().replace(" ", "_").replace(":", "_") filename = f"{filename}.json" - directory = os.path.join(memgpt_dir, "saved_state") + directory = os.path.join(MEMGPT_DIR, "saved_state") filename = os.path.join(directory, filename) try: if not os.path.exists(directory): @@ -394,6 +395,8 @@ async def main( ).ask_async() clear_line() + user_input = user_input.rstrip() + if user_input.startswith("!"): print(f"Commands for CLI begin with '/' not '!'") continue @@ -416,7 +419,7 @@ async def main( utils.get_local_time().replace(" ", "_").replace(":", "_") ) filename = f"{filename}.pkl" - directory = os.path.join(memgpt_dir, "saved_chats") + directory = os.path.join(MEMGPT_DIR, "saved_chats") try: if not os.path.exists(directory): os.makedirs(directory) diff --git a/memgpt/utils.py b/memgpt/utils.py index 52b044ab6e..441fb50e5e 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -15,33 +15,40 @@ import fitz from tqdm import tqdm from memgpt.openai_tools import async_get_embedding_with_backoff -from memgpt.config import memgpt_dir +from memgpt.constants import MEMGPT_DIR + def count_tokens(s: str, model: str = "gpt-4") -> int: encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(s)) + # DEBUG = True DEBUG = False + + def printd(*args, **kwargs): if DEBUG: print(*args, **kwargs) + def cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) + def united_diff(str1, str2): lines1 = str1.splitlines(True) lines2 = str2.splitlines(True) diff = difflib.unified_diff(lines1, lines2) - return ''.join(diff) + return "".join(diff) + def get_local_time_military(): # Get the current time in UTC current_time_utc = datetime.now(pytz.utc) # Convert to San Francisco's time zone (PST/PDT) - sf_time_zone = pytz.timezone('America/Los_Angeles') + sf_time_zone = pytz.timezone("America/Los_Angeles") local_time = current_time_utc.astimezone(sf_time_zone) # You may format it as you desire @@ -49,12 +56,13 @@ def get_local_time_military(): return formatted_time + def get_local_time(): # Get the current time in UTC current_time_utc = datetime.now(pytz.utc) # Convert to San Francisco's time zone (PST/PDT) - sf_time_zone = pytz.timezone('America/Los_Angeles') + sf_time_zone = pytz.timezone("America/Los_Angeles") local_time = current_time_utc.astimezone(sf_time_zone) # You may format it as you desire, including AM/PM @@ -62,6 +70,7 @@ def get_local_time(): return formatted_time + def parse_json(string): result = None try: @@ -77,23 +86,27 @@ def parse_json(string): print(f"Error parsing json with demjson package: {e}") raise e + def prepare_archival_index(folder): index_file = os.path.join(folder, "all_docs.index") index = faiss.read_index(index_file) archival_database_file = os.path.join(folder, "all_docs.jsonl") archival_database = [] - with open(archival_database_file, 'rt') as f: + with open(archival_database_file, "rt") as f: all_data = [json.loads(line) for line in f] for doc in all_data: total = len(doc) for i, passage in enumerate(doc): - archival_database.append({ - 'content': f"[Title: {passage['title']}, {i}/{total}] {passage['text']}", - 'timestamp': get_local_time(), - }) + archival_database.append( + { + "content": f"[Title: {passage['title']}, {i}/{total}] {passage['text']}", + "timestamp": get_local_time(), + } + ) return index, archival_database + def read_in_chunks(file_object, chunk_size): while True: data = file_object.read(chunk_size) @@ -101,12 +114,14 @@ def read_in_chunks(file_object, chunk_size): break yield data + def read_pdf_in_chunks(file, chunk_size): doc = fitz.open(file) for page in doc: text = page.get_text() yield text + def read_in_rows_csv(file_object, chunk_size): csvreader = csv.reader(file_object) header = next(csvreader) @@ -114,14 +129,16 @@ def read_in_rows_csv(file_object, chunk_size): next_row_terms = [] for h, v in zip(header, row): next_row_terms.append(f"{h}={v}") - next_row_str = ', '.join(next_row_terms) + next_row_str = ", ".join(next_row_terms) yield next_row_str -def prepare_archival_index_from_files(glob_pattern, tkns_per_chunk=300, model='gpt-4'): + +def prepare_archival_index_from_files(glob_pattern, tkns_per_chunk=300, model="gpt-4"): encoding = tiktoken.encoding_for_model(model) files = glob.glob(glob_pattern) return chunk_files(files, tkns_per_chunk, model) + def total_bytes(pattern): total = 0 for filename in glob.glob(pattern): @@ -129,32 +146,35 @@ def total_bytes(pattern): total += os.path.getsize(filename) return total -def chunk_file(file, tkns_per_chunk=300, model='gpt-4'): + +def chunk_file(file, tkns_per_chunk=300, model="gpt-4"): encoding = tiktoken.encoding_for_model(model) - with open(file, 'r') as f: - if file.endswith('.pdf'): - lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk*8)] + with open(file, "r") as f: + if file.endswith(".pdf"): + lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk * 8)] if len(lines) == 0: print(f"Warning: {file} did not have any extractable text.") - elif file.endswith('.csv'): - lines = [l for l in read_in_rows_csv(f, tkns_per_chunk*8)] + elif file.endswith(".csv"): + lines = [l for l in read_in_rows_csv(f, tkns_per_chunk * 8)] else: - lines = [l for l in read_in_chunks(f, tkns_per_chunk*4)] + lines = [l for l in read_in_chunks(f, tkns_per_chunk * 4)] curr_chunk = [] curr_token_ct = 0 for i, line in enumerate(lines): line = line.rstrip() line = line.lstrip() - line += '\n' + line += "\n" try: line_token_ct = len(encoding.encode(line)) except Exception as e: - line_token_ct = len(line.split(' ')) / .75 - print(f"Could not encode line {i}, estimating it to be {line_token_ct} tokens") + line_token_ct = len(line.split(" ")) / 0.75 + print( + f"Could not encode line {i}, estimating it to be {line_token_ct} tokens" + ) print(e) if line_token_ct > tkns_per_chunk: if len(curr_chunk) > 0: - yield ''.join(curr_chunk) + yield "".join(curr_chunk) curr_chunk = [] curr_token_ct = 0 yield line[:3200] @@ -162,47 +182,57 @@ def chunk_file(file, tkns_per_chunk=300, model='gpt-4'): curr_token_ct += line_token_ct curr_chunk.append(line) if curr_token_ct > tkns_per_chunk: - yield ''.join(curr_chunk) + yield "".join(curr_chunk) curr_chunk = [] curr_token_ct = 0 if len(curr_chunk) > 0: - yield ''.join(curr_chunk) + yield "".join(curr_chunk) -def chunk_files(files, tkns_per_chunk=300, model='gpt-4'): + +def chunk_files(files, tkns_per_chunk=300, model="gpt-4"): archival_database = [] for file in files: timestamp = os.path.getmtime(file) - formatted_time = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %I:%M:%S %p %Z%z") - file_stem = file.split('/')[-1] + formatted_time = datetime.fromtimestamp(timestamp).strftime( + "%Y-%m-%d %I:%M:%S %p %Z%z" + ) + file_stem = file.split("/")[-1] chunks = [c for c in chunk_file(file, tkns_per_chunk, model)] for i, chunk in enumerate(chunks): - archival_database.append({ - 'content': f"[File: {file_stem} Part {i}/{len(chunks)}] {chunk}", - 'timestamp': formatted_time, - }) + archival_database.append( + { + "content": f"[File: {file_stem} Part {i}/{len(chunks)}] {chunk}", + "timestamp": formatted_time, + } + ) return archival_database -def chunk_files_for_jsonl(files, tkns_per_chunk=300, model='gpt-4'): + +def chunk_files_for_jsonl(files, tkns_per_chunk=300, model="gpt-4"): ret = [] for file in files: - file_stem = file.split('/')[-1] + file_stem = file.split("/")[-1] curr_file = [] for chunk in chunk_file(file, tkns_per_chunk, model): - curr_file.append({ - 'title': file_stem, - 'text': chunk, - }) + curr_file.append( + { + "title": file_stem, + "text": chunk, + } + ) ret.append(curr_file) return ret + async def process_chunk(i, chunk, model): try: - return i, await async_get_embedding_with_backoff(chunk['content'], model=model) + return i, await async_get_embedding_with_backoff(chunk["content"], model=model) except Exception as e: print(chunk) raise e + async def process_concurrently(archival_database, model, concurrency=10): # Create a semaphore to limit the number of concurrent tasks semaphore = asyncio.Semaphore(concurrency) @@ -213,44 +243,64 @@ async def bounded_process_chunk(i, chunk): # Create a list of tasks for chunks embedding_data = [0 for _ in archival_database] - tasks = [bounded_process_chunk(i, chunk) for i, chunk in enumerate(archival_database)] - - for future in tqdm(asyncio.as_completed(tasks), total=len(archival_database), desc="Processing file chunks"): + tasks = [ + bounded_process_chunk(i, chunk) for i, chunk in enumerate(archival_database) + ] + + for future in tqdm( + asyncio.as_completed(tasks), + total=len(archival_database), + desc="Processing file chunks", + ): i, result = await future embedding_data[i] = result - + return embedding_data -async def prepare_archival_index_from_files_compute_embeddings(glob_pattern, tkns_per_chunk=300, model='gpt-4', embeddings_model='text-embedding-ada-002'): + +async def prepare_archival_index_from_files_compute_embeddings( + glob_pattern, + tkns_per_chunk=300, + model="gpt-4", + embeddings_model="text-embedding-ada-002", +): files = sorted(glob.glob(glob_pattern)) - save_dir = os.path.join(memgpt_dir, "archival_index_from_files_" + get_local_time().replace(' ', '_').replace(':', '_')) + save_dir = os.path.join( + MEMGPT_DIR, + "archival_index_from_files_" + + get_local_time().replace(" ", "_").replace(":", "_"), + ) os.makedirs(save_dir, exist_ok=True) total_tokens = total_bytes(glob_pattern) / 3 - price_estimate = total_tokens / 1000 * .0001 - confirm = input(f"Computing embeddings over {len(files)} files. This will cost ~${price_estimate:.2f}. Continue? [y/n] ") - if confirm != 'y': + price_estimate = total_tokens / 1000 * 0.0001 + confirm = input( + f"Computing embeddings over {len(files)} files. This will cost ~${price_estimate:.2f}. Continue? [y/n] " + ) + if confirm != "y": raise Exception("embeddings were not computed") # chunk the files, make embeddings archival_database = chunk_files(files, tkns_per_chunk, model) embedding_data = await process_concurrently(archival_database, embeddings_model) embeddings_file = os.path.join(save_dir, "embeddings.json") - with open(embeddings_file, 'w') as f: + with open(embeddings_file, "w") as f: print(f"Saving embeddings to {embeddings_file}") json.dump(embedding_data, f) - + # make all_text.json archival_storage_file = os.path.join(save_dir, "all_docs.jsonl") chunks_by_file = chunk_files_for_jsonl(files, tkns_per_chunk, model) - with open(archival_storage_file, 'w') as f: - print(f"Saving archival storage with preloaded files to {archival_storage_file}") + with open(archival_storage_file, "w") as f: + print( + f"Saving archival storage with preloaded files to {archival_storage_file}" + ) for c in chunks_by_file: json.dump(c, f) - f.write('\n') + f.write("\n") # make the faiss index index = faiss.IndexFlatL2(1536) - data = np.array(embedding_data).astype('float32') + data = np.array(embedding_data).astype("float32") try: index.add(data) except Exception as e: @@ -261,8 +311,9 @@ async def prepare_archival_index_from_files_compute_embeddings(glob_pattern, tkn faiss.write_index(index, index_file) return save_dir + def read_database_as_list(database_name): - result_list = [] + result_list = [] try: conn = sqlite3.connect(database_name) diff --git a/pyproject.toml b/pyproject.toml index 5ff6dff7b8..fd8e6ca8ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pymemgpt" -version = "0.1.0" +version = "0.1.2" packages = [ {include = "memgpt"} ] From 5c222a7a1d45d7b31ee7386b1657201bf4893ac5 Mon Sep 17 00:00:00 2001 From: Vivian Fang Date: Wed, 25 Oct 2023 12:44:44 -0700 Subject: [PATCH 6/6] Revert "cleanup" This reverts commit f119a981d1c428441e2ce9b4369a03d423cc1afb. --- memgpt/local_llm/chat_completion_proxy.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py index 41442781ec..bd1c9dfdd8 100644 --- a/memgpt/local_llm/chat_completion_proxy.py +++ b/memgpt/local_llm/chat_completion_proxy.py @@ -5,7 +5,7 @@ import json from .webui.api import get_webui_completion -from .llm_chat_completion_wrappers import airoboros, dolphin +from .llm_chat_completion_wrappers import airoboros from .utils import DotDict HOST = os.getenv("OPENAI_API_BASE") @@ -23,14 +23,14 @@ async def get_chat_completion( if function_call != "auto": raise ValueError(f"function_call == {function_call} not supported (auto only)") - if model == "airoboros-l2-70b-2.1": - llm_wrapper = airoboros.Airoboros21InnerMonologueWrapper() - elif model == "dolphin-2.1-mistral-7b": - llm_wrapper = dolphin.Dolphin21MistralWrapper() + if model == "airoboros_v2.1": + llm_wrapper = airoboros.Airoboros21Wrapper() else: # Warn the user that we're using the fallback - print(f"Warning: no wrapper specified for local LLM, using the default wrapper") - llm_wrapper = DEFAULT_WRAPPER + print( + f"Warning: could not find an LLM wrapper for {model}, using the airoboros wrapper" + ) + llm_wrapper = airoboros.Airoboros21Wrapper() # First step: turn the message sequence into a prompt that the model expects prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)