Merge pull request #4 from cpacker/sql

Example SQL integration with MemGPT Co-authored-by: Shishir Patil <[email protected]>
letta-ai · Oct 17, 2023 · 895fa59 · 895fa59
2 parents 92c1b5e + e0ed9ca
commit 895fa59
Show file tree

Hide file tree

Showing 5 changed files with 92 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -12,20 +12,32 @@
 </div>
 
 <details open>
-  <summary><h2>Create perpetual chatbots 🤖 with self-editing memory!</h1></summary>
+  <summary><h2>🤖 Create perpetual chatbots with self-editing memory!</h1></summary>
   <div align="center">
     <br>
     <img src="https://memgpt.ai/assets/img/demo.gif" alt="MemGPT demo video" width="800">
   </div>
 </details>
 
-<details open>
-  <summary><h2>Chat with your data 🗃️ - try talking to the <a href="memgpt/personas/examples/docqa">LlamaIndex API docs</a>!</h1></summary>
+<details>
+ <summary><h2>🗃️ Chat with your data - talk to your SQL database or your local files!</strong></h2></summary>
+  <strong>SQL Database</strong>
+  <div align="center">
+    <img src="https://memgpt.ai/assets/img/sql_demo.gif" alt="MemGPT demo video for sql search" width="800">
+  </div>
+  <strong>Local files</strong>
+  <div align="center">
+    <img src="https://memgpt.ai/assets/img/preload_archival_demo.gif" alt="MemGPT demo video for sql search" width="800">
+  </div>
+</details>
+
+<details>
+  <summary><h2>📄 You can also talk to docs - for example ask about <a href="memgpt/personas/examples/docqa">LlamaIndex</a>!</h1></summary>
   <div align="center">
     <img src="https://memgpt.ai/assets/img/docqa_demo.gif" alt="MemGPT demo video for llamaindex api docs search" width="800">
   </div>
   <details>
-  <summary><h3>ChatGPT (GPT-4) when asked the same question:</h3></summary>
+  <summary><b>ChatGPT (GPT-4) when asked the same question:</b></summary>
     <div align="center">
       <img src="https://memgpt.ai/assets/img/llama_index_gpt4.png" alt="GPT-4 when asked about llamaindex api docs" width="800">
     </div>
@@ -97,6 +109,8 @@ python main.py --human me.txt
   load in document database (backed by FAISS index)
 --archival_storage_files="<ARCHIVAL_STORAGE_FILES_GLOB>"
   pre-load files into archival memory
+--archival_storage_sqldb=<SQLDB_PATH>
+  load in SQL database
 ```
 
 ### Interactive CLI commands
@@ -122,6 +136,35 @@ While using MemGPT via the CLI you can run various commands:
   send a memory warning system message to the agent
 ```
 
+## Use MemGPT to talk to your Database!
+
+MemGPT's archival memory let's you load your database and talk to it! To motivate this use-case, we have included a toy example. 
+
+Consider the `test.db` already included in the repository.
+
+id	| name |	age
+--- | --- | ---
+1	| Alice |	30
+2	| Bob	 | 25
+3	| Charlie |	35
+
+To talk to this database, run:
+
+```sh
+python main_db.py  --archival_storage_sqldb=memgpt/personas/examples/sqldb/test.db
+```
+
+And then you can input the path to your database, and your query.
+
+```python
+Please enter the path to the database. test.db
+...
+Enter your message: How old is Bob?
+...
+🤖 Bob is 25 years old.
+```
+
+
 ### Support
 
 * By default MemGPT will use `gpt-4`, so your API key will require `gpt-4` API access.

diff --git a/main.py b/main.py
@@ -26,6 +26,7 @@
 flags.DEFINE_boolean("debug", default=False, required=False, help="Use -debug to enable debugging output")
 flags.DEFINE_string("archival_storage_faiss_path", default="", required=False, help="Specify archival storage with FAISS index to load (a folder with a .index and .json describing documents to be loaded)")
 flags.DEFINE_string("archival_storage_files", default="", required=False, help="Specify files to pre-load into archival memory (glob pattern)")
+flags.DEFINE_string("archival_storage_sqldb", default="", required=False, help="Specify SQL database to pre-load into archival memory")
 
 
 def clear_line():
@@ -58,12 +59,26 @@ async def main():
     print_messages = interface.print_messages
     await print_messages(memgpt_agent.messages)
 
+
     counter = 0
     user_input = None
     skip_next_user_input = False
     user_message = None
     USER_GOES_FIRST = FLAGS.first
 
+    if FLAGS.archival_storage_sqldb:
+        if not os.path.exists(FLAGS.archival_storage_sqldb):
+            print(f"File {user_input} does not exist")
+            return
+        # Ingest data from file into archival storage
+        else:
+            print(f"Database found! Loading database into archival memory")
+            data_list = utils.read_database_as_list(FLAGS.archival_storage_sqldb)
+            user_message = f"Your archival memory has been loaded with a SQL database called {data_list[0]}, which contains schema {data_list[1]}. Remember to refer to this first while answering any user questions!"
+            for row in data_list:
+                await memgpt_agent.persistence_manager.archival_memory.insert(row)
+            print(f"Database loaded into archival memory.")
+
     if not USER_GOES_FIRST:
         console.input('[bold cyan]Hit enter to begin (will request first MemGPT message)[/bold cyan]')
         clear_line()
@@ -82,6 +97,7 @@ async def main():
 
             if user_input == "":
                 # no empty messages allowed
+                print("Empty input received. Try again!")
                 continue
 
             # Handle CLI commands

diff --git a/memgpt/personas/examples/sqldb/test.db b/memgpt/personas/examples/sqldb/test.db
diff --git a/memgpt/utils.py b/memgpt/utils.py
@@ -8,6 +8,7 @@
 import faiss
 import tiktoken
 import glob
+import sqlite3
 
 def count_tokens(s: str, model: str = "gpt-4") -> int:
     encoding = tiktoken.encoding_for_model(model)
@@ -137,3 +138,30 @@ def prepare_archival_index_from_files(glob_pattern, tkns_per_chunk=300, model='g
                 'timestamp': formatted_time,
             })
     return archival_database
+
+def read_database_as_list(database_name):
+    result_list = [] 
+
+    try:
+        conn = sqlite3.connect(database_name)
+        cursor = conn.cursor()
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        table_names = cursor.fetchall()
+        for table_name in table_names:
+            cursor.execute(f"PRAGMA table_info({table_name[0]});")
+            schema_rows = cursor.fetchall()
+            columns = [row[1] for row in schema_rows]
+            cursor.execute(f"SELECT * FROM {table_name[0]};")
+            rows = cursor.fetchall()
+            result_list.append(f"Table: {table_name[0]}")  # Add table name to the list
+            schema_row = "\t".join(columns)
+            result_list.append(schema_row)
+            for row in rows:
+                data_row = "\t".join(map(str, row))
+                result_list.append(data_row)
+        conn.close()
+    except sqlite3.Error as e:
+        result_list.append(f"Error reading database: {str(e)}")
+    except Exception as e:
+        result_list.append(f"Error: {str(e)}")
+    return result_list
diff --git a/requirements.txt b/requirements.txt
@@ -11,3 +11,4 @@ numpy
 absl-py
 pybars3
 faiss-cpu
+sqlite3