From 8fc8c55f14af5353ec4179735e5771b47fec1c9d Mon Sep 17 00:00:00 2001
From: Shubham Naik <shubham.naik10@gmail.com>
Date: Fri, 11 Oct 2024 15:51:14 -0700
Subject: [PATCH] chore: support alembic (#1867)

Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
---
 .github/workflows/migration-test.yml          |  34 +++++
 CONTRIBUTING.md                               |  16 +++
 alembic.ini                                   | 116 ++++++++++++++++++
 alembic/README                                |   1 +
 alembic/env.py                                |  84 +++++++++++++
 alembic/script.py.mako                        |  26 ++++
 ...505cc7eca9_create_a_baseline_migrations.py |  27 ++++
 letta/agent_store/db.py                       |  10 +-
 letta/base.py                                 |   3 +
 letta/metadata.py                             |   7 +-
 letta/server/server.py                        |  40 ++----
 poetry.lock                                   |  12 +-
 pyproject.toml                                |   1 +
 tests/test_client.py                          |   2 +-
 tests/test_new_client.py                      |  67 ----------
 15 files changed, 334 insertions(+), 112 deletions(-)
 create mode 100644 .github/workflows/migration-test.yml
 create mode 100644 alembic.ini
 create mode 100644 alembic/README
 create mode 100644 alembic/env.py
 create mode 100644 alembic/script.py.mako
 create mode 100644 alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
 create mode 100644 letta/base.py

diff --git a/.github/workflows/migration-test.yml b/.github/workflows/migration-test.yml
new file mode 100644
index 0000000000..9f04b6d2d5
--- /dev/null
+++ b/.github/workflows/migration-test.yml
@@ -0,0 +1,34 @@
+name: Alembic Migration Tester
+on:
+  pull_request:
+    paths:
+      - '**.py'
+  workflow_dispatch:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Build and run container
+        run: bash db/run_postgres.sh
+
+      - name: "Setup Python, Poetry and Dependencies"
+        uses: packetcoders/action-setup-cache-python-poetry@main
+        with:
+          python-version: "3.12"
+          poetry-version: "1.8.2"
+          install-args: "--all-extras"
+      - name: Test alembic migration
+        env:
+          LETTA_PG_PORT: 8888
+          LETTA_PG_USER: letta
+          LETTA_PG_PASSWORD: letta
+          LETTA_PG_DB: letta
+          LETTA_PG_HOST: localhost
+          LETTA_SERVER_PASS: test_server_token
+        run: |
+          poetry run alembic upgrade head
+          poetry run alembic check
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 709ca15b80..c8b8e3989c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,6 +65,7 @@ $ . venv/bin/activate
 
 If you are having dependency issues using `pip`, we recommend you install the package using Poetry. Installing Letta from source using Poetry will ensure that you are using exact package versions that have been tested for the production build.
 
+
 #### (Optional) Installing pre-commit
 We recommend installing pre-commit to ensure proper formatting during development:
 ```
@@ -86,6 +87,21 @@ git checkout -b feature/your-feature
 
 Now, the world is your oyster! Go ahead and craft your fabulous changes. 🎨
 
+
+#### Handling Database Migrations
+If you are running Letta for the first time, your database will be automatically be setup. If you are updating Letta, you may need to run migrations. To run migrations, use the following command:
+```shell
+poetry run alembic upgrade head
+```
+
+#### Creating a new Database Migration
+If you have made changes to the database models, you will need to create a new migration. To create a new migration, use the following command:
+```shell
+poetry run alembic revision --autogenerate -m "Your migration message here"
+```
+
+Visit the [Alembic documentation](https://alembic.sqlalchemy.org/en/latest/tutorial.html) for more information on creating and running migrations.
+
 ## 3. ✅ Testing
 
 Before we hit the 'Wow, I'm Done' button, let's make sure everything works as expected. Run tests and make sure the existing ones don't throw a fit. And if needed, create new tests. 🕵️
diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 0000000000..72cc69904f
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/README b/alembic/README
new file mode 100644
index 0000000000..2500aa1bcf
--- /dev/null
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 0000000000..69e3a60b55
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,84 @@
+import os
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config, pool
+
+from alembic import context
+from letta.base import Base
+from letta.settings import settings
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+print(settings.letta_pg_uri_no_default)
+if settings.letta_pg_uri_no_default:
+    config.set_main_option("sqlalchemy.url", settings.letta_pg_uri)
+else:
+    config.set_main_option("sqlalchemy.url", "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True)
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
new file mode 100644
index 0000000000..fbc4b07dce
--- /dev/null
+++ b/alembic/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py b/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
new file mode 100644
index 0000000000..d1ee25e1d7
--- /dev/null
+++ b/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
@@ -0,0 +1,27 @@
+"""Create a baseline migrations
+
+Revision ID: 9a505cc7eca9
+Revises:
+Create Date: 2024-10-11 14:19:19.875656
+
+"""
+
+from typing import Sequence, Union
+
+# revision identifiers, used by Alembic.
+revision: str = "9a505cc7eca9"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
diff --git a/letta/agent_store/db.py b/letta/agent_store/db.py
index 585de6edee..ff22af8c4d 100644
--- a/letta/agent_store/db.py
+++ b/letta/agent_store/db.py
@@ -18,13 +18,14 @@
     select,
     text,
 )
-from sqlalchemy.orm import declarative_base, mapped_column
+from sqlalchemy.orm import mapped_column
 from sqlalchemy.orm.session import close_all_sessions
 from sqlalchemy.sql import func
 from sqlalchemy_json import MutableJson
 from tqdm import tqdm
 
 from letta.agent_store.storage import StorageConnector, TableType
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.constants import MAX_EMBEDDING_DIM
 from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
@@ -35,7 +36,6 @@
 from letta.schemas.passage import Passage
 from letta.settings import settings
 
-Base = declarative_base()
 config = LettaConfig()
 
 
@@ -560,3 +560,9 @@ def update(self, record):
 
             # Commit the changes to the database
             session.commit()
+
+
+def attach_base():
+    # This should be invoked in server.py to make sure Base gets initialized properly
+    # DO NOT REMOVE
+    print("Initializing database...")
diff --git a/letta/base.py b/letta/base.py
new file mode 100644
index 0000000000..860e54258a
--- /dev/null
+++ b/letta/base.py
@@ -0,0 +1,3 @@
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
diff --git a/letta/metadata.py b/letta/metadata.py
index 3e56fddbe3..c8f206f349 100644
--- a/letta/metadata.py
+++ b/letta/metadata.py
@@ -14,11 +14,10 @@
     String,
     TypeDecorator,
     desc,
-    func,
 )
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func
 
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.schemas.agent import AgentState
 from letta.schemas.api_key import APIKey
@@ -28,6 +27,8 @@
 from letta.schemas.job import Job
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
+
+# from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
 from letta.schemas.openai.chat_completions import ToolCall, ToolCallFunction
 from letta.schemas.organization import Organization
 from letta.schemas.source import Source
@@ -36,8 +37,6 @@
 from letta.settings import settings
 from letta.utils import enforce_types, get_utc_time, printd
 
-Base = declarative_base()
-
 
 class LLMConfigColumn(TypeDecorator):
     """Custom type for storing LLMConfig as JSON"""
diff --git a/letta/server/server.py b/letta/server/server.py
index b37ec867d4..efd16a784b 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -14,8 +14,8 @@
 import letta.server.utils as server_utils
 import letta.system as system
 from letta.agent import Agent, save_agent
+from letta.agent_store.db import attach_base
 from letta.agent_store.storage import StorageConnector, TableType
-from letta.config import LettaConfig
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data
 
@@ -41,7 +41,7 @@
 from letta.interface import CLIInterface  # for printing to terminal
 from letta.log import get_logger
 from letta.memory import get_memory_functions
-from letta.metadata import MetadataStore
+from letta.metadata import Base, MetadataStore
 from letta.prompts import gpt_system
 from letta.providers import (
     AnthropicProvider,
@@ -150,23 +150,11 @@ def run_command(self, user_id: str, agent_id: str, command: str) -> Union[str, N
 
 
 from sqlalchemy import create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker
+from sqlalchemy.orm import sessionmaker
 
-from letta.agent_store.db import MessageModel, PassageModel
 from letta.config import LettaConfig
 
 # NOTE: hack to see if single session management works
-from letta.metadata import (
-    AgentModel,
-    AgentSourceMappingModel,
-    APIKeyModel,
-    BlockModel,
-    JobModel,
-    OrganizationModel,
-    SourceModel,
-    ToolModel,
-    UserModel,
-)
 from letta.settings import model_settings, settings
 
 config = LettaConfig.load()
@@ -183,24 +171,12 @@ def run_command(self, user_id: str, agent_id: str, command: str) -> Union[str, N
     # TODO: don't rely on config storage
     engine = create_engine("sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
 
-Base = declarative_base()
+
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-Base.metadata.create_all(
-    engine,
-    tables=[
-        UserModel.__table__,
-        AgentModel.__table__,
-        SourceModel.__table__,
-        AgentSourceMappingModel.__table__,
-        APIKeyModel.__table__,
-        BlockModel.__table__,
-        ToolModel.__table__,
-        JobModel.__table__,
-        PassageModel.__table__,
-        MessageModel.__table__,
-        OrganizationModel.__table__,
-    ],
-)
+
+attach_base()
+
+Base.metadata.create_all(bind=engine)
 
 
 # Dependency
diff --git a/poetry.lock b/poetry.lock
index ae52f96686..011c932b38 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -139,13 +139,13 @@ frozenlist = ">=1.1.0"
 
 [[package]]
 name = "alembic"
-version = "1.13.2"
+version = "1.13.3"
 description = "A database migration tool for SQLAlchemy."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"},
-    {file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"},
+    {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
+    {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
 ]
 
 [package.dependencies]
@@ -3814,7 +3814,7 @@ Werkzeug = ">=2.0.0"
 name = "mako"
 version = "1.3.5"
 description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"},
@@ -8354,4 +8354,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.13,>=3.10"
-content-hash = "aa0bbf5825741bdc9c06388e7e27c1d9a2d85d517abb7f51cca71cc8349d1170"
+content-hash = "2302d430ae353f5453bbf4223e9e00be38fcca45259de2924b38b14e36ab8024"
diff --git a/pyproject.toml b/pyproject.toml
index 38114897ad..124ce3e923 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ langchain = {version = "^0.2.16", optional = true}
 langchain-community = {version = "^0.2.17", optional = true}
 composio-langchain = "^0.5.28"
 composio-core = "^0.5.28"
+alembic = "^1.13.3"
 
 [tool.poetry.extras]
 #local = ["llama-index-embeddings-huggingface"]
diff --git a/tests/test_client.py b/tests/test_client.py
index 58c7775ec6..fe3e581544 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -385,7 +385,7 @@ def test_sources(client: Union[LocalClient, RESTClient], agent: AgentState):
     # list archival memory
     archival_memories = client.get_archival_memory(agent_id=agent.id)
     # print(archival_memories)
-    assert len(archival_memories) == created_passages
+    assert len(archival_memories) == created_passages, f"Mismatched length {len(archival_memories)} vs. {created_passages}"
 
     # check number of passages
     sources = client.list_sources()
diff --git a/tests/test_new_client.py b/tests/test_new_client.py
index 4a436d7fe5..395b6020e4 100644
--- a/tests/test_new_client.py
+++ b/tests/test_new_client.py
@@ -405,70 +405,3 @@ def test_tool_creation_langchain_missing_imports(client):
     # Intentionally missing {"langchain_community.utilities": "WikipediaAPIWrapper"}
     with pytest.raises(RuntimeError):
         Tool.from_langchain(langchain_tool)
-
-
-def test_sources(client, agent):
-    # list sources (empty)
-    sources = client.list_sources()
-    assert len(sources) == 0
-
-    # create a source
-    test_source_name = "test_source"
-    source = client.create_source(name=test_source_name)
-
-    # list sources
-    sources = client.list_sources()
-    assert len(sources) == 1
-    assert sources[0].metadata_["num_passages"] == 0
-    assert sources[0].metadata_["num_documents"] == 0
-
-    # update the source
-    original_id = source.id
-    original_name = source.name
-    new_name = original_name + "_new"
-    client.update_source(source_id=source.id, name=new_name)
-
-    # get the source name (check that it's been updated)
-    source = client.get_source(source_id=source.id)
-    assert source.name == new_name
-    assert source.id == original_id
-
-    # get the source id (make sure that it's the same)
-    assert str(original_id) == client.get_source_id(source_name=new_name)
-
-    # check agent archival memory size
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    print(archival_memories)
-    assert len(archival_memories) == 0
-
-    # load a file into a source
-    filename = "CONTRIBUTING.md"
-    upload_job = client.load_file_into_source(filename=filename, source_id=source.id)
-    print("Upload job", upload_job, upload_job.status, upload_job.metadata_)
-
-    # TODO: make sure things run in the right order
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    assert len(archival_memories) == 0
-
-    # attach a source
-    client.attach_source_to_agent(source_id=source.id, agent_id=agent.id)
-
-    # list archival memory
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    # print(archival_memories)
-    assert len(archival_memories) == 20 or len(archival_memories) == 21
-
-    # check number of passages
-    sources = client.list_sources()
-
-    # TODO: do we want to add this metadata back?
-    # assert sources[0].metadata_["num_passages"] > 0
-    # assert sources[0].metadata_["num_documents"] == 0  # TODO: fix this once document store added
-    print(sources)
-
-    # detach the source
-    # TODO: add when implemented
-    # client.detach_source(source.name, agent.id)
-
-    # delete the source
-    client.delete_source(source.id)