spack · alecbcs · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,6 @@ __pycache__
 .env
 spack.lock
 .spack-env
-db/*.db
+*.db
 .coverage
 htmlcov
diff --git a/Dockerfile b/Dockerfile
@@ -16,6 +16,6 @@ RUN /venv/bin/pip install --disable-pip-version-check -r /requirements.txt
 FROM gcr.io/distroless/python3-debian12:latest
 COPY --from=build /venv /venv
 COPY ./gantry /app/gantry
-COPY ./db /app/db
+COPY ./migrations /app/migrations
 WORKDIR /app
 ENTRYPOINT ["/venv/bin/python", "-m", "gantry"]
diff --git a/docs/data-collection.md b/docs/data-collection.md
@@ -4,7 +4,7 @@ Job metadata is retrieved through the Spack Prometheus service (https://promethe
 
 Gantry exposes a webhook handler at `/v1/collection` which will accept a job status payload from Gitlab and collect build attributes and usage, submitting to the database.
 
-See `/db/schema.sql` for a full list of the data that is being collected.
+See the `migrations` folder for the complete database schema.
 
 ## Units
 

diff --git a/docs/deploy.md b/docs/deploy.md
@@ -39,4 +39,8 @@ While most details are better suited to be documented with the cluster, there ar
 
 We have made an architectural decision to depend on SQLite as the database engine. Before you deploy Gantry into a cluster, you should ensure that the file will be backed up on a regular basis, in the case that unexpected circumstances corrupt your data. This can be achieved using [Litestream](https://litestream.io), which will continuously replicate the database with the storage provider of your choice. See the cluster configuration linked above for details.
 
-When you first deploy the application on the cluster, be sure to run `/db/init_db.py <db_path>` to initialize an SQLite file with default tables, and to apply any migrations.
+When first deployed, the application will create `$DB_FILE` if it doesn't already exist. It will not create directories in the path.
+
+**Migrations**
+
+Changes to the database schema are stored in the `migrations/` directory. Follow the naming convention `000_name.sql` and place new items in the `migrations` list inside the `gantry/__main__.py:apply_migrations` function.
diff --git a/gantry/__main__.py b/gantry/__main__.py
@@ -16,13 +16,34 @@
 )
 
 
+async def apply_migrations(db: aiosqlite.Connection):
+    # grab the current version of the database
+    async with db.execute("PRAGMA user_version") as cursor:
+        row = await cursor.fetchone()
+        current_version = row[0]
+
+    migrations = [
+        # migrations manually defined here to ensure
+        # they are applied in the correct order
+        # and not inadvertently added to the migrations folder
+        ("001_initial.sql", 1),
+    ]
+
+    # apply migrations that have not been applied
+    # by comparing the current version to the version of the migration
+    for migration, version in migrations:
+        if current_version < version:
+            logger.info(f"Applying migration {migration}")
+            with open(f"migrations/{migration}") as f:
+                await db.executescript(f.read())
+            # update the version of the database
+            await db.execute(f"PRAGMA user_version = {version}")
+            await db.commit()
+
+
 async def init_db(app: web.Application):
     db = await aiosqlite.connect(os.environ["DB_FILE"])
-    # create a database with the schema if it doesn't exist
-    # otherwise, this is a noop
-    with open("db/schema.sql") as f:
-        await db.executescript(f.read())
-    await db.commit()
+    await apply_migrations(db)
     app["db"] = db
     yield
     await db.close()

diff --git a/gantry/tests/conftest.py b/gantry/tests/conftest.py
@@ -3,14 +3,16 @@
 import aiosqlite
 import pytest
 
+from gantry.__main__ import apply_migrations
+
 
 @pytest.fixture
 async def db_conn():
     """
     In-memory sqlite connection ensures that the database is clean for each test
     """
     db = await aiosqlite.connect(":memory:")
-    with open("db/schema.sql") as f:
-        await db.executescript(f.read())
+    # apply the schema
+    await apply_migrations(db)
     yield db
     await db.close()
diff --git a/db/schema.sql → migrations/001_initial.sql b/db/schema.sql → migrations/001_initial.sql