-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #487 from microbiomedata/432-migrations-implement-…
…bookkeeping-so-database-tells-us-which-schema-it-conforms-to Migrations: Implement bookkeeping
- Loading branch information
Showing
19 changed files
with
985 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
from typing import Optional | ||
from enum import Enum | ||
from datetime import datetime | ||
|
||
from pymongo import MongoClient | ||
from nmdc_schema.migrators.migrator_base import MigratorBase | ||
|
||
|
||
class MigrationEvent(str, Enum): | ||
r""" | ||
Enumeration of all migration events that can be recorded. | ||
Reference: https://docs.python.org/3.10/library/enum.html#others | ||
>>> MigrationEvent.MIGRATION_COMPLETED.value | ||
'MIGRATION_COMPLETED' | ||
>>> MigrationEvent.MIGRATION_STARTED.value | ||
'MIGRATION_STARTED' | ||
""" | ||
MIGRATION_STARTED = "MIGRATION_STARTED" | ||
MIGRATION_COMPLETED = "MIGRATION_COMPLETED" | ||
|
||
|
||
class Bookkeeper: | ||
r""" | ||
A class you can use to record migration-related events in a Mongo database. | ||
""" | ||
def __init__( | ||
self, | ||
mongo_client: MongoClient, | ||
database_name: str = "nmdc", | ||
collection_name: str = "_migration_events", | ||
view_name: str = "_migration_latest_schema_version", | ||
): | ||
self.database_name = database_name | ||
self.collection_name = collection_name | ||
self.view_name = view_name | ||
|
||
# Store references to the database and collection. | ||
self.db = mongo_client[self.database_name] | ||
self.collection = self.db.get_collection(self.collection_name) | ||
|
||
# Ensure the MongoDB view that indicates the current schema version, exists. | ||
self.ensure_current_schema_version_view_exists() | ||
|
||
@staticmethod | ||
def get_current_timestamp() -> str: | ||
r"""Returns an ISO 8601 timestamp (string) representing the current time in UTC.""" | ||
utc_now = datetime.utcnow() | ||
iso_utc_now = utc_now.isoformat() | ||
return iso_utc_now # e.g. "2024-02-21T04:31:03.115107" | ||
|
||
def record_migration_event( | ||
self, | ||
migrator: MigratorBase, | ||
event: MigrationEvent, | ||
to_schema_version: Optional[str] = None, | ||
) -> None: | ||
r""" | ||
Records a migration event in the collection. | ||
The `to_schema_version` parameter is independent of the `migrator` parameter because, even though the migrator | ||
does have a `.get_destination_version()` method, the string returned by that method is the one defined when the | ||
migrator was _written_, which is sometimes more generic than the one used for data validation when the migrator | ||
is _run_ (e.g. "1.2" as opposed to "1.2.3"). So, this method provides a means by which the calling code can, | ||
optionally, specify a more precise version identifier. | ||
""" | ||
|
||
# If a custom schema version identifier was specified, use that; otherwise, use the one built into the migrator. | ||
to_schema_version_str = migrator.get_destination_version() | ||
if to_schema_version is not None: | ||
to_schema_version_str = to_schema_version | ||
|
||
document = dict( | ||
created_at=self.get_current_timestamp(), | ||
event=event.value, | ||
from_schema_version=migrator.get_origin_version(), | ||
to_schema_version=to_schema_version_str, | ||
migrator_module=migrator.__module__, # name of the Python module in which the `Migrator` class is defined | ||
) | ||
self.collection.insert_one(document) | ||
|
||
def ensure_current_schema_version_view_exists(self) -> None: | ||
r""" | ||
Ensures the MongoDB view that indicates the current schema version, exists. | ||
References: | ||
- https://www.mongodb.com/community/forums/t/is-there-anyway-to-create-view-using-python/161363/2 | ||
- https://www.mongodb.com/docs/manual/reference/method/db.createView/#mongodb-method-db.createView | ||
- https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection | ||
""" | ||
if ( | ||
self.view_name not in self.db.list_collection_names() | ||
): # returns list of names of both collections and views | ||
agg_pipeline = [ | ||
# Sort the documents so the most recent one is first. | ||
{"$sort": {"created_at": -1}}, | ||
# Only preserve the first document. | ||
{"$limit": 1}, | ||
# Derive a simplified document for the view. | ||
# Examples: `{ "schema_version": "1.2.3" }` or `{ "schema_version": null }` | ||
{ | ||
"$project": { | ||
"_id": 0, # omit the `_id` field | ||
"schema_version": { # add this field based upon the migration status | ||
"$cond": { | ||
"if": { | ||
"$eq": [ | ||
"$event", | ||
MigrationEvent.MIGRATION_COMPLETED.value, | ||
] | ||
}, | ||
"then": "$to_schema_version", # database conforms to this version of the NMDC Schema | ||
"else": None, # database doesn't necessarily conform to any version of the NMDC Schema | ||
} | ||
}, | ||
} | ||
}, | ||
] | ||
self.db.create_collection( | ||
name=self.view_name, | ||
viewOn=self.collection_name, | ||
pipeline=agg_pipeline, | ||
check_exists=True, # only create the view if it doesn't already exist | ||
comment="The version of the NMDC Schema to which the database conforms.", | ||
) |
186 changes: 186 additions & 0 deletions
186
demo/metadata_migration/notebooks/manual_test_bookkeeper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
# Note: This module's name starts with `manual_test_` instead of `test_` to signify that its author (currently) expects | ||
# people to run it manually as opposed to via automated test infrastructure. Its author chose that route after a | ||
# GitHub Actions workflow that runs tests in `test_` modules kept failing due to a database access issue. | ||
|
||
from typing import Optional | ||
import unittest | ||
import re | ||
import os | ||
from datetime import datetime, timedelta | ||
|
||
from pymongo import MongoClient, timeout | ||
from pymongo.database import Database | ||
from nmdc_schema.migrators.migrator_base import MigratorBase | ||
|
||
from demo.metadata_migration.notebooks.bookkeeper import Bookkeeper, MigrationEvent | ||
|
||
# Consume environment variables. | ||
MONGO_HOST: str = os.getenv("MONGO_HOST", "localhost") | ||
MONGO_USER: Optional[str] = os.getenv("MONGO_USERNAME", None) | ||
MONGO_PASS: Optional[str] = os.getenv("MONGO_PASSWORD", None) | ||
MONGO_DATABASE_NAME: str = os.getenv("MONGO_TEST_DBNAME", "test-migration-bookkeeper") | ||
|
||
MONGO_TIMEOUT_DURATION: int = 3 # seconds | ||
|
||
|
||
class FakeMigrator(MigratorBase): | ||
_to_version = "A.B.C" | ||
_from_version = "X.Y.Z" | ||
|
||
def upgrade(self): | ||
pass | ||
|
||
|
||
class TestBookkeeper(unittest.TestCase): | ||
r""" | ||
Tests targeting the `Bookkeeper` class. | ||
You can format this file like this: | ||
$ python -m black demo/metadata_migration/notebooks/manual_test_bookkeeper.py | ||
You can start up a containerized MongoDB server like this: | ||
$ docker run --rm --detach --name mongo-test-migration-bookkeeper -p 27017:27017 mongo | ||
One that's running, other containers will be able to access it via: | ||
- host.docker.internal:27017 | ||
You can run these tests like this: | ||
$ python -m unittest -v demo/metadata_migration/notebooks/manual_test_bookkeeper.py | ||
Reference: https://docs.python.org/3/library/unittest.html#basic-example | ||
""" | ||
|
||
mongo_client: Optional[MongoClient] = None | ||
db: Optional[Database] = None | ||
|
||
def setUp(self) -> None: | ||
r""" | ||
Connects to the MongoDB server and gets a reference to the database. | ||
Note: This function runs before each test starts. | ||
""" | ||
|
||
# Connect to the MongoDB server and store a reference to the connection. | ||
self.mongo_client = MongoClient( | ||
host=MONGO_HOST, | ||
username=MONGO_USER, | ||
password=MONGO_PASS, | ||
) | ||
with timeout(MONGO_TIMEOUT_DURATION): | ||
# Try connecting to the database server. | ||
_ = self.mongo_client.server_info() | ||
db = self.mongo_client[MONGO_DATABASE_NAME] | ||
|
||
# Ensure the database contains no collections. | ||
if len(db.list_collection_names()): | ||
raise KeyError(f"Database is not empty: {MONGO_DATABASE_NAME}") | ||
|
||
# Store a reference to the database. | ||
self.db = db | ||
|
||
def tearDown(self) -> None: | ||
r""" | ||
Drops all collections in the database and closes the connection to the MongoDB server. | ||
Note: This function runs after each test finishes. | ||
""" | ||
|
||
# Drop all collections in the database. | ||
for collection_name in self.db.list_collection_names(): | ||
self.db.drop_collection(collection_name) | ||
|
||
# Close the connection to the server. | ||
self.mongo_client.close() | ||
|
||
def test_get_current_timestamp(self): | ||
pattern = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}$" | ||
ts_str = Bookkeeper.get_current_timestamp() | ||
|
||
# Verify the timestamp is a string having a valid format. | ||
self.assertIsInstance(ts_str, str) | ||
self.assertTrue(re.match(pattern, ts_str)) | ||
|
||
# Verify the moment represented by the timestamp was within the past minute. | ||
ts = datetime.fromisoformat(ts_str) | ||
now_ts = datetime.now() | ||
time_difference = now_ts - ts | ||
self.assertLess(time_difference, timedelta(minutes=1)) | ||
|
||
def test_init_method(self): | ||
# Confirm the view does not exist yet. | ||
view_name = "test_view" | ||
self.assertFalse(view_name in self.db.list_collection_names()) | ||
|
||
# Instantiate the class-under-test. | ||
_ = Bookkeeper( | ||
mongo_client=self.mongo_client, | ||
database_name=MONGO_DATABASE_NAME, | ||
view_name=view_name, | ||
) | ||
|
||
# Confirm the view exists now. | ||
self.assertTrue(view_name in self.db.list_collection_names()) | ||
|
||
def test_record_migration_event(self): | ||
# Instantiate a bookkeeper. | ||
bk = Bookkeeper( | ||
mongo_client=self.mongo_client, | ||
database_name=MONGO_DATABASE_NAME, | ||
) | ||
|
||
# Verify the collection is empty. | ||
collection = self.db.get_collection(bk.collection_name) | ||
self.assertTrue(collection.count_documents({}) == 0) | ||
|
||
# Record a "migration started" event. | ||
migrator = FakeMigrator() | ||
bk.record_migration_event( | ||
migrator=migrator, event=MigrationEvent.MIGRATION_STARTED | ||
) | ||
|
||
# Verify the migration event was recorded. | ||
self.assertTrue(collection.count_documents({}) == 1) | ||
doc = collection.find({})[0] | ||
self.assertIsInstance(doc["created_at"], str) | ||
self.assertIsInstance(doc["event"], str) | ||
self.assertEqual(doc["event"], MigrationEvent.MIGRATION_STARTED) | ||
self.assertIsInstance(doc["from_schema_version"], str) | ||
self.assertEqual(doc["from_schema_version"], migrator.get_origin_version()) | ||
self.assertIsInstance(doc["to_schema_version"], str) | ||
self.assertEqual(doc["to_schema_version"], migrator.get_destination_version()) | ||
self.assertIsInstance(doc["migrator_module"], str) | ||
|
||
# Verify the document in the view says the schema version is `null`. | ||
# Note: That's what I expect, since no "migration complete" events have been recorded yet. | ||
view = self.db.get_collection(bk.view_name) | ||
self.assertTrue(view.count_documents({}) == 1) | ||
view_doc = view.find({})[0] | ||
self.assertIsNone(view_doc["schema_version"]) | ||
|
||
# Record a "migration completed" event. | ||
bk.record_migration_event( | ||
migrator=migrator, event=MigrationEvent.MIGRATION_COMPLETED | ||
) | ||
|
||
# Verify the migration event was recorded. | ||
self.assertTrue(collection.count_documents({}) == 2) | ||
|
||
# Verify the document in the view says the schema version matches the one recorded. | ||
view = self.db.get_collection(bk.view_name) | ||
self.assertTrue(view.count_documents({}) == 1) | ||
view_doc = view.find({})[0] | ||
self.assertEqual(view_doc["schema_version"], migrator.get_destination_version()) | ||
|
||
# Finally, record another "migration started" event. | ||
bk.record_migration_event( | ||
migrator=migrator, event=MigrationEvent.MIGRATION_STARTED | ||
) | ||
|
||
# Confirm the document in the view once again says the schema version is `null`. | ||
self.assertTrue(view.count_documents({}) == 1) | ||
view_doc = view.find({})[0] | ||
self.assertIsNone(view_doc["schema_version"]) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Oops, something went wrong.