diff --git a/demo/metadata_migration/notebooks/.gitignore b/demo/metadata_migration/notebooks/.gitignore index 000100c5..20949554 100644 --- a/demo/metadata_migration/notebooks/.gitignore +++ b/demo/metadata_migration/notebooks/.gitignore @@ -1,5 +1,4 @@ /.notebook.env -/.mongo.origin.yaml -/.mongo.transformer.yaml /mongodump.*.out -/tmp.* \ No newline at end of file +/tmp.* +/*_migration.log \ No newline at end of file diff --git a/demo/metadata_migration/notebooks/.mongo.yaml.example b/demo/metadata_migration/notebooks/.mongo.yaml.example deleted file mode 100644 index a1df2e02..00000000 --- a/demo/metadata_migration/notebooks/.mongo.yaml.example +++ /dev/null @@ -1,29 +0,0 @@ -# MongoDB client configuration file for connecting to a MongoDB server. -# -# Instructions: -# -# 1. Update `uri` so it contains the MongoDB server connection string. -# -# Syntax: -# mongodb://{user}:{password}@{host}:{port}/?authSource={auth_database} -# -# Example: -# Assuming username is "root", password is "pass", host is "localhost", -# port is "27017", and name of authentication database is "admin", -# the value of `uri` would be: -# ``` -# mongodb://root:pass@localhost:27017/?authSource=admin -# ``` -# -# Example: -# Assuming the same scenario as in the previous example, but without -# access control enabled (i.e. no username/password), -# the value of `uri` would be: -# ``` -# mongodb://localhost:27017/ -# ``` -# -# Reference: -# https://www.mongodb.com/docs/database-tools/mongodump/#std-option-mongodump.--uri -# -uri: mongodb://user:pass@localhost:27017/?authSource=admin \ No newline at end of file diff --git a/demo/metadata_migration/notebooks/.notebook.env.example b/demo/metadata_migration/notebooks/.notebook.env.example index 25d3e266..187c7197 100644 --- a/demo/metadata_migration/notebooks/.notebook.env.example +++ b/demo/metadata_migration/notebooks/.notebook.env.example @@ -1,11 +1,20 @@ -# Paths to Mongo config files. -PATH_TO_ORIGIN_MONGO_CONFIG_FILE = "./.mongo.origin.yaml" -PATH_TO_TRANSFORMER_MONGO_CONFIG_FILE = "./.mongo.transformer.yaml" - # Paths to folders in which the notebook will store Mongo dumps. PATH_TO_ORIGIN_MONGO_DUMP_FOLDER = "./mongodump.origin.out" PATH_TO_TRANSFORMER_MONGO_DUMP_FOLDER = "./mongodump.transformer.out" -# These are absolute paths to the `mongodump` and `mongorestore` programs. -PATH_TO_MONGODUMP_BINARY = "__REPLACE_ME__" # e.g. "/Users/Alice/Downloads/mongodb-database-tools-macos-arm64-100.7.4/bin/mongodump" +# These are absolute paths to the `mongodump`, `mongorestore`, and `mongosh` programs. +PATH_TO_MONGODUMP_BINARY = "__REPLACE_ME__" # e.g. "/Users/Alice/Downloads/mongodb-database-tools-macos-arm64-100.7.4/bin/mongodump" PATH_TO_MONGORESTORE_BINARY = "__REPLACE_ME__" # e.g. "/Users/Alice/Downloads/mongodb-database-tools-macos-arm64-100.7.4/bin/mongorestore" +PATH_TO_MONGOSH_BINARY = "__REPLACE_ME__" # e.g. "/Users/Alice/Downloads/mongosh-1.10.6-darwin-x64/bin/mongosh" + +# Connection parameters for the Origin Mongo server (typically a remote serve). +ORIGIN_MONGO_HOST="__REPLACE_ME__" +ORIGIN_MONGO_PORT="__REPLACE_ME__" +ORIGIN_MONGO_USERNAME="__REPLACE_ME__" +ORIGIN_MONGO_PASSWORD="__REPLACE_ME__" + +# Connection parameters for the Transformer Mongo server (typically a local server). +TRANSFORMER_MONGO_HOST="__REPLACE_ME__" +TRANSFORMER_MONGO_PORT="__REPLACE_ME__" +TRANSFORMER_MONGO_USERNAME="__REPLACE_ME__" +TRANSFORMER_MONGO_PASSWORD="__REPLACE_ME__" diff --git a/demo/metadata_migration/notebooks/helpers.py b/demo/metadata_migration/notebooks/helpers.py index d513af9f..4865ac3b 100644 --- a/demo/metadata_migration/notebooks/helpers.py +++ b/demo/metadata_migration/notebooks/helpers.py @@ -1,9 +1,13 @@ from pathlib import Path -import re -from typing import Dict +from typing import Dict, Optional, List +import logging +from datetime import datetime from dotenv import dotenv_values -import yaml +from linkml_runtime import SchemaView + + +DATABASE_CLASS_NAME = "Database" class Config: @@ -21,27 +25,9 @@ def parse_and_validate_notebook_config_file( # Parse the notebook config file. notebook_config = dotenv_values(notebook_config_file_path) - # Validate the Mongo config file paths. - origin_mongo_config_file_path = notebook_config[ - "PATH_TO_ORIGIN_MONGO_CONFIG_FILE" - ] - transformer_mongo_config_file_path = notebook_config[ - "PATH_TO_TRANSFORMER_MONGO_CONFIG_FILE" - ] - if not Path(origin_mongo_config_file_path).is_file(): - raise FileNotFoundError( - f"Origin Mongo config file not found at: {origin_mongo_config_file_path}" - ) - if not Path(transformer_mongo_config_file_path).is_file(): - raise FileNotFoundError( - f"Transformer Mongo config file not found at: {transformer_mongo_config_file_path}" - ) - # Validate the dump folder paths. origin_dump_folder_path = notebook_config["PATH_TO_ORIGIN_MONGO_DUMP_FOLDER"] - transformer_dump_folder_path = notebook_config[ - "PATH_TO_TRANSFORMER_MONGO_DUMP_FOLDER" - ] + transformer_dump_folder_path = notebook_config["PATH_TO_TRANSFORMER_MONGO_DUMP_FOLDER"] if not Path(origin_dump_folder_path).parent.is_dir(): raise FileNotFoundError( f"Parent folder of {origin_dump_folder_path} (origin Mongo dump folder path) not found." @@ -54,62 +40,107 @@ def parse_and_validate_notebook_config_file( # Validate the binary paths. mongodump_path = notebook_config["PATH_TO_MONGODUMP_BINARY"] mongorestore_path = notebook_config["PATH_TO_MONGORESTORE_BINARY"] + mongosh_path = notebook_config["PATH_TO_MONGOSH_BINARY"] if not Path(mongodump_path).is_file(): raise FileNotFoundError(f"mongodump binary not found at: {mongodump_path}") if not Path(mongorestore_path).is_file(): - raise FileNotFoundError( - f"mongorestore binary not found at: {mongorestore_path}" - ) + raise FileNotFoundError(f"mongorestore binary not found at: {mongorestore_path}") + if not Path(mongosh_path).is_file(): + raise FileNotFoundError(f"mongosh binary not found at: {mongosh_path}") + + origin_mongo_host = notebook_config["ORIGIN_MONGO_HOST"] + origin_mongo_port = notebook_config["ORIGIN_MONGO_PORT"] + origin_mongo_username = notebook_config["ORIGIN_MONGO_USERNAME"] + origin_mongo_password = notebook_config["ORIGIN_MONGO_PASSWORD"] + + transformer_mongo_host = notebook_config["TRANSFORMER_MONGO_HOST"] + transformer_mongo_port = notebook_config["TRANSFORMER_MONGO_PORT"] + transformer_mongo_username = notebook_config["TRANSFORMER_MONGO_USERNAME"] + transformer_mongo_password = notebook_config["TRANSFORMER_MONGO_PASSWORD"] return dict( - origin_mongo_config_file_path=origin_mongo_config_file_path, - transformer_mongo_config_file_path=transformer_mongo_config_file_path, origin_dump_folder_path=origin_dump_folder_path, transformer_dump_folder_path=transformer_dump_folder_path, mongodump_path=mongodump_path, mongorestore_path=mongorestore_path, + mongosh_path=mongosh_path, + origin_mongo_host=origin_mongo_host, + origin_mongo_port=origin_mongo_port, + origin_mongo_username=origin_mongo_username, + origin_mongo_password=origin_mongo_password, + transformer_mongo_host=transformer_mongo_host, + transformer_mongo_port=transformer_mongo_port, + transformer_mongo_username=transformer_mongo_username, + transformer_mongo_password=transformer_mongo_password, ) - def parse_and_validate_mongo_config_file( - self, mongo_config_file_path: str - ) -> Dict[str, str]: - # Parse the Mongo config files as YAML. - with open(mongo_config_file_path, "r") as file: - mongo_config = yaml.safe_load(file) - - # Validate the connection string. - uri = mongo_config["uri"] - if not re.match( - r"^mongodb:\/\/.*", uri - ): # note: this is a sanity test, not a comprehensive test - raise ValueError(f"uri value in {mongo_config_file_path} is invalid.") - - return dict(uri=uri) - def __init__(self, notebook_config_file_path: str = "./.notebook.env") -> None: # Parse and validate the notebook config file. - notebook_config = self.parse_and_validate_notebook_config_file( - notebook_config_file_path - ) + notebook_config = self.parse_and_validate_notebook_config_file(notebook_config_file_path) self.mongodump_path = notebook_config["mongodump_path"] self.mongorestore_path = notebook_config["mongorestore_path"] + self.mongosh_path = notebook_config["mongosh_path"] self.origin_dump_folder_path = notebook_config["origin_dump_folder_path"] - self.transformer_dump_folder_path = notebook_config[ - "transformer_dump_folder_path" - ] - - # Parse and validate the Mongo config files. - self.origin_mongo_config_file_path = notebook_config[ - "origin_mongo_config_file_path" - ] - self.transformer_mongo_config_file_path = notebook_config[ - "transformer_mongo_config_file_path" - ] - origin_mongo_server_config = self.parse_and_validate_mongo_config_file( - self.origin_mongo_config_file_path - ) - transformer_mongo_server_config = self.parse_and_validate_mongo_config_file( - self.transformer_mongo_config_file_path - ) - self.origin_mongo_server_uri = origin_mongo_server_config["uri"] - self.transformer_mongo_server_uri = transformer_mongo_server_config["uri"] + self.transformer_dump_folder_path = notebook_config["transformer_dump_folder_path"] + + # Parse the Mongo connection parameters. + self.origin_mongo_host = notebook_config["origin_mongo_host"] + self.origin_mongo_port = notebook_config["origin_mongo_port"] + self.origin_mongo_username = notebook_config["origin_mongo_username"] + self.origin_mongo_password = notebook_config["origin_mongo_password"] + self.transformer_mongo_host = notebook_config["transformer_mongo_host"] + self.transformer_mongo_port = notebook_config["transformer_mongo_port"] + self.transformer_mongo_username = notebook_config["transformer_mongo_username"] + self.transformer_mongo_password = notebook_config["transformer_mongo_password"] + + +def setup_logger( + log_file_path: Optional[str] = None, + logger_name: str = "migrator_logger", + log_level: int = logging.DEBUG, +) -> logging.Logger: + r""" + Returns a logger that writes to a file at the specified log file path + (default: "./{YYYYMMDD_HHMM}_migration.log"). + """ + + # If no log file path was specified, generate one. + if log_file_path is None: + yyyymmdd_hhmm: str = datetime.now().strftime("%Y%m%d_%H%M") # YYYYMMDD_HHMM + log_file_path = f"./{yyyymmdd_hhmm}_migration.log" + + logger = logging.getLogger(name=logger_name) + logger.setLevel(level=log_level) + file_handler = logging.FileHandler(log_file_path) + formatter = logging.Formatter( + fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + file_handler.setFormatter(formatter) + if logger.hasHandlers(): + logger.handlers.clear() # avoids duplicate log entries + logger.addHandler(file_handler) + return logger + + +def get_collection_names_from_schema(schema_view: SchemaView) -> List[str]: + """ + Returns the names of the slots of the `Database` class that describe database collections. + + :param schema_view: A `SchemaView` instance + """ + collection_names = [] + + for slot_name in schema_view.class_slots(DATABASE_CLASS_NAME): + slot_definition = schema_view.induced_slot(slot_name, DATABASE_CLASS_NAME) + + # Filter out any hypothetical (future) slots that don't correspond to a collection (e.g. `db_version`). + if slot_definition.multivalued and slot_definition.inlined_as_list: + collection_names.append(slot_name) + + # Filter out duplicate names. This is to work around the following issues in the schema: + # - https://github.com/microbiomedata/nmdc-schema/issues/1954 + # - https://github.com/microbiomedata/nmdc-schema/issues/1955 + collection_names = list(set(collection_names)) + + return collection_names diff --git a/demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb b/demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb new file mode 100644 index 00000000..ceb9b9a3 --- /dev/null +++ b/demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": "# Migrate MongoDB database from `nmdc-schema` `v10.5.6` to `v10.8.0`", + "id": "d05efc6327778f9c" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "There are no migrators associated with any schema changes between schema versions `v10.5.6` and `v10.8.0`. So, this notebook is a \"no op\" (i.e. \"no operation\").", + "id": "b99d5924e825b9a2" + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "jupyter": { + "is_executing": true + } + }, + "source": "# no op", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb b/demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb new file mode 100644 index 00000000..e0a45cdb --- /dev/null +++ b/demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb @@ -0,0 +1,758 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "source": "# Migrate MongoDB database from `nmdc-schema` `v10.8.0` to `v11.0.0`" + }, + { + "cell_type": "markdown", + "id": "3c31d85d", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook will be used to migrate the database from `nmdc-schema` `v10.8.0` ([released](https://github.com/microbiomedata/nmdc-schema/releases/tag/v10.8.0) August 21, 2024) to `v11.0.0` (i.e. the initial version of the so-called \"Berkeley schema\").\n", + "\n", + "Unlike previous migrators, this one does not pick and choose which collections it will dump. There are two reasons for this: (1) migrators no longer have a dedicated `self.agenda` dictionary that indicates all the collections involved in the migration; and (2) this migration is the first one that involves creating, renaming, and dropping any collections; none of which are things that the old `self.agenda`-based system was designed to handle. So, instead of picking and choosing collections, this migrator **dumps them all.**" + ] + }, + { + "cell_type": "markdown", + "id": "f65ad4ab", + "metadata": {}, + "source": [ + "## Prerequisites" + ] + }, + { + "cell_type": "markdown", + "id": "17f351e8", + "metadata": {}, + "source": [ + "### 1. Coordinate with stakeholders.\n", + "\n", + "We will be enacting full Runtime and Database downtime for this migration. Ensure stakeholders are aware of that." + ] + }, + { + "cell_type": "markdown", + "id": "233a35c3", + "metadata": {}, + "source": [ + "### 2. Set up notebook environment.\n", + "\n", + "Here, you'll prepare an environment for running this notebook.\n", + "\n", + "1. Start a **MongoDB server** on your local machine (and ensure it does **not** already contain a database named `nmdc`).\n", + " 1. You can start a [Docker](https://hub.docker.com/_/mongo)-based MongoDB server at `localhost:27055` by running this command. A MongoDB server started this way will be accessible without a username or password.\n" + ] + }, + { + "cell_type": "code", + "id": "8aee55e3", + "metadata": {}, + "source": [ + "!docker run --rm --detach --name mongo-migration-transformer -p 27055:27017 mongo:6.0.4" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "22f5c78f", + "metadata": {}, + "source": [ + "2. Delete **obsolete dumps** from previous notebooks runs.\n", + " 1. This is so the dumps you generate below will not be merged with any unrelated ones." + ] + }, + { + "cell_type": "code", + "id": "c70b6715", + "metadata": {}, + "source": [ + "!rm -rf {cfg.origin_dump_folder_path}\n", + "!rm -rf {cfg.transformer_dump_folder_path}" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "6cd05ccb", + "metadata": {}, + "source": [ + "3. Create and populate a **notebook configuration file** named `.notebook.env`.\n", + " 1. You can use `.notebook.env.example` as a template.\n", + " 2. The \"origin\" MongoDB server is the one that contains the database you want to migrate.\n", + " 3. The \"transformer\" MongoDB server is the one you want to use to perform the data transformations." + ] + }, + { + "cell_type": "markdown", + "id": "69937b18", + "metadata": {}, + "source": [ + "## Procedure" + ] + }, + { + "cell_type": "markdown", + "id": "fe81196a", + "metadata": {}, + "source": [ + "### Install Python packages\n", + "\n", + "In this step, you'll [install](https://saturncloud.io/blog/what-is-the-difference-between-and-in-jupyter-notebooks/) the Python packages upon which this notebook depends.\n", + "\n", + "> Note: If the output of this cell says \"Note: you may need to restart the kernel to use updated packages\", restart the kernel (not the notebook cells), then proceed to the next cell.\n", + "\n", + "##### References\n", + "\n", + "| Description | Link |\n", + "|---------------------------------------------------------------------------------|--------------------------------------------------------|\n", + "| Berkeley Schema PyPI package
(it's version 11+ of the `nmdc-schema` package) | https://pypi.org/project/nmdc-schema |\n", + "| Berkeley Schema GitHub repository | https://github.com/microbiomedata/berkeley-schema-fy24 |\n", + "| How to `pip install` from a Git branch
instead of PyPI | https://stackoverflow.com/a/20101940 |" + ] + }, + { + "cell_type": "code", + "id": "e25a0af308c3185b", + "metadata": { + "collapsed": false + }, + "source": [ + "%pip install --upgrade pip\n", + "%pip install -r requirements.txt\n", + "%pip install nmdc-schema==11.0.0rc20" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "a407c354", + "metadata": {}, + "source": [ + "### Import Python dependencies\n", + "\n", + "Import the Python objects upon which this notebook depends.\n", + "\n", + "- TODO: Consider whether the JSON Schema Validator version (e.g. `Draft7Validator` versus `Draft201909Validator`) is consistent with the JSON Schema version." + ] + }, + { + "cell_type": "code", + "id": "dbecd561", + "metadata": {}, + "source": [ + "# Standard library packages:\n", + "import subprocess\n", + "from typing import List\n", + "\n", + "# Third-party packages:\n", + "import pymongo\n", + "from jsonschema import Draft7Validator as JSONSchemaValidator\n", + "from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict, SchemaVariantIdentifier, get_nmdc_jsonschema\n", + "from nmdc_schema.migrators.adapters.mongo_adapter import MongoAdapter\n", + "from linkml_runtime import SchemaView\n", + "\n", + "# Note: The migrator module has \"10_2_0\" in its name because, when it was created,\n", + "# the latest legacy schema version was, indeed, still `10.2.0`.\n", + "from nmdc_schema.migrators.migrator_from_10_2_0_to_11_0_0 import Migrator\n", + "\n", + "# First-party packages:\n", + "from helpers import Config, setup_logger, get_collection_names_from_schema\n", + "from bookkeeper import Bookkeeper, MigrationEvent" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "99b20ff4", + "metadata": {}, + "source": [ + "### Parse configuration files\n", + "\n", + "Parse the notebook and Mongo configuration files." + ] + }, + { + "cell_type": "code", + "id": "1eac645a", + "metadata": {}, + "source": [ + "cfg = Config()\n", + "\n", + "# Define some aliases we can use to make the shell commands in this notebook easier to read.\n", + "mongodump = cfg.mongodump_path\n", + "mongorestore = cfg.mongorestore_path\n", + "mongosh = cfg.mongosh_path\n", + "\n", + "# Perform a sanity test of the application paths.\n", + "!{mongodump} --version\n", + "!{mongorestore} --version\n", + "!{mongosh} --version" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "68245d2b", + "metadata": {}, + "source": [ + "### Create MongoDB clients\n", + "\n", + "Create MongoDB clients you can use to access the \"origin\" and \"transformer\" MongoDB servers." + ] + }, + { + "cell_type": "code", + "id": "8e95f559", + "metadata": {}, + "source": [ + "# Mongo client for \"origin\" MongoDB server.\n", + "origin_mongo_client = pymongo.MongoClient(host=cfg.origin_mongo_host, \n", + " port=cfg.origin_mongo_port,\n", + " username=cfg.origin_mongo_username,\n", + " password=cfg.origin_mongo_password,\n", + " directConnection=True)\n", + "\n", + "# Mongo client for \"transformer\" MongoDB server.\n", + "transformer_mongo_client = pymongo.MongoClient(host=cfg.transformer_mongo_host, \n", + " port=cfg.transformer_mongo_port,\n", + " username=cfg.transformer_mongo_username,\n", + " password=cfg.transformer_mongo_password,\n", + " directConnection=True)\n", + "\n", + "# Perform sanity tests of those MongoDB clients' abilities to access their respective MongoDB servers.\n", + "with pymongo.timeout(3):\n", + " # Display the MongoDB server version (running on the \"origin\" Mongo server).\n", + " print(\"Origin Mongo server version: \" + origin_mongo_client.server_info()[\"version\"])\n", + "\n", + " # Sanity test: Ensure the origin database exists.\n", + " assert \"nmdc\" in origin_mongo_client.list_database_names(), \"Origin database does not exist.\"\n", + "\n", + " # Display the MongoDB server version (running on the \"transformer\" Mongo server).\n", + " print(\"Transformer Mongo server version: \" + transformer_mongo_client.server_info()[\"version\"])\n", + "\n", + " # Sanity test: Ensure the transformation database does not exist.\n", + " assert \"nmdc\" not in transformer_mongo_client.list_database_names(), \"Transformation database already exists.\"" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "1e195db1", + "metadata": {}, + "source": [ + "Delete the \"nmdc\" database from the transformer MongoDB server if that database already exists there (e.g. if it was left over from an experiment).\n", + "\n", + "##### Description\n", + "\n", + "| Description | Link |\n", + "|------------------------------|---------------------------------------------------------------|\n", + "| Python's `subprocess` module | https://docs.python.org/3/library/subprocess.html |\n", + "| `mongosh` CLI options | https://www.mongodb.com/docs/mongodb-shell/reference/options/ |" + ] + }, + { + "cell_type": "code", + "id": "8939a2ed", + "metadata": {}, + "source": [ + "# Note: I run this command via Python's `subprocess` module instead of via an IPython magic `!` command\n", + "# because I expect to eventually use regular Python scripts—not Python notebooks—for migrations.\n", + "shell_command = f\"\"\"\n", + " {cfg.mongosh_path} \\\n", + " --host='{cfg.transformer_mongo_host}' \\\n", + " --port='{cfg.transformer_mongo_port}' \\\n", + " --username='{cfg.transformer_mongo_username}' \\\n", + " --password='{cfg.transformer_mongo_password}' \\\n", + " --quiet \\\n", + " --eval 'use nmdc' \\\n", + " --eval 'db.dropDatabase()'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "bc387abc62686091", + "metadata": { + "collapsed": false + }, + "source": [ + "### Create JSON Schema validator\n", + "\n", + "In this step, you'll create a JSON Schema validator for the NMDC Schema." + ] + }, + { + "cell_type": "code", + "id": "5c982eb0c04e606d", + "metadata": { + "collapsed": false + }, + "source": [ + "nmdc_jsonschema: dict = get_nmdc_jsonschema_dict(variant=SchemaVariantIdentifier.nmdc_materialized_patterns)\n", + "nmdc_jsonschema_validator = JSONSchemaValidator(nmdc_jsonschema)\n", + "\n", + "# Perform sanity tests of the NMDC Schema dictionary and the JSON Schema validator.\n", + "# Reference: https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/protocols/#jsonschema.protocols.Validator.check_schema\n", + "print(\"NMDC Schema title: \" + nmdc_jsonschema[\"title\"])\n", + "print(\"NMDC Schema version: \" + nmdc_jsonschema[\"version\"])\n", + "\n", + "nmdc_jsonschema_validator.check_schema(nmdc_jsonschema) # raises exception if schema is invalid" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Create SchemaView\n", + "\n", + "In this step, you'll instantiate a `SchemaView` that is bound to the destination schema. \n", + "\n", + "- Reference: https://linkml.io/linkml/developers/schemaview.html" + ], + "id": "e7e8befb362a1670" + }, + { + "metadata": {}, + "cell_type": "code", + "source": "schema_view = SchemaView(get_nmdc_jsonschema())", + "id": "625a6e7df5016677", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "3975ac24", + "metadata": {}, + "source": [ + "### Revoke access from the \"origin\" MongoDB server\n", + "\n", + "We revoke both \"write\" and \"read\" access to the server.\n", + "\n", + "We revoke \"write\" access so people don't make changes to the original data while the migration is happening, given that the migration ends with an overwriting of the original data (which would wipe out any changes made in the meantime).\n", + "\n", + "We also revoke \"read\" access. The revocation of \"read\" access is technically optional, but (a) the JavaScript mongosh script will be easier for me to maintain if it revokes everything and (b) this prevents people from reading data during the restore step, during which the database may not be self-consistent.\n", + "\n", + "##### References\n", + "\n", + "| Description | Link |\n", + "|--------------------------------|-----------------------------------------------------------|\n", + "| Running a script via `mongosh` | https://www.mongodb.com/docs/mongodb-shell/write-scripts/ |" + ] + }, + { + "cell_type": "code", + "id": "f761caad", + "metadata": {}, + "source": [ + "shell_command = f\"\"\"\n", + " {cfg.mongosh_path} \\\n", + " --host='{cfg.origin_mongo_host}' \\\n", + " --port='{cfg.origin_mongo_port}' \\\n", + " --username='{cfg.origin_mongo_username}' \\\n", + " --password='{cfg.origin_mongo_password}' \\\n", + " --quiet \\\n", + " --file='mongosh-scripts/revoke-privileges.mongo.js'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "b7799910b6b0715d", + "metadata": {}, + "source": [ + "### Dump collections from the \"origin\" MongoDB server\n", + "\n", + "Use `mongodump` to dump all the collections **from** the \"origin\" MongoDB server **into** a local directory.\n", + "\n", + "- TODO: Consider only dumping collections represented by the initial schema." + ] + }, + { + "cell_type": "code", + "id": "da530d6754c4f6fe", + "metadata": {}, + "source": [ + "# Dump all collections from the \"origin\" database.\n", + "shell_command = f\"\"\"\n", + " {mongodump} \\\n", + " --host='{cfg.origin_mongo_host}' \\\n", + " --port='{cfg.origin_mongo_port}' \\\n", + " --username='{cfg.origin_mongo_username}' \\\n", + " --password='{cfg.origin_mongo_password}' \\\n", + " --authenticationDatabase='admin' \\\n", + " --db='nmdc' \\\n", + " --gzip \\\n", + " --out='{cfg.origin_dump_folder_path}'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "932ebde8abdd70ec", + "metadata": {}, + "source": [ + "### Load the dumped collections into the \"transformer\" MongoDB server\n", + "\n", + "Use `mongorestore` to load the dumped collections **from** the local directory **into** the \"transformer\" MongoDB server." + ] + }, + { + "cell_type": "code", + "id": "79bd888e82d52a93", + "metadata": {}, + "source": [ + "# Restore the dumped collections to the \"transformer\" MongoDB server.\n", + "shell_command = f\"\"\"\n", + " {mongorestore} \\\n", + " --host='{cfg.transformer_mongo_host}' \\\n", + " --port='{cfg.transformer_mongo_port}' \\\n", + " --username='{cfg.transformer_mongo_username}' \\\n", + " --password='{cfg.transformer_mongo_password}' \\\n", + " --authenticationDatabase='admin' \\\n", + " --gzip \\\n", + " --drop \\\n", + " --preserveUUID \\\n", + " --stopOnError \\\n", + " --dir='{cfg.origin_dump_folder_path}'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "c3e3c9c4", + "metadata": {}, + "source": [ + "### Transform the collections within the \"transformer\" MongoDB server\n", + "\n", + "Use the migrator to transform the collections in the \"transformer\" database.\n", + "\n", + "> Reminder: The database transformation functions are defined in the `nmdc-schema` Python package installed earlier.\n", + "\n", + "> Reminder: The \"origin\" database is **not** affected by this step." + ] + }, + { + "cell_type": "code", + "id": "9c89c9dd3afe64e2", + "metadata": {}, + "source": [ + "# Instantiate a MongoAdapter bound to the \"transformer\" database.\n", + "adapter = MongoAdapter(\n", + " database=transformer_mongo_client[\"nmdc\"],\n", + " on_collection_created=lambda name: print(f'Created collection \"{name}\"'),\n", + " on_collection_renamed=lambda old_name, name: print(f'Renamed collection \"{old_name}\" to \"{name}\"'),\n", + " on_collection_deleted=lambda name: print(f'Deleted collection \"{name}\"'),\n", + ")\n", + "\n", + "# Instantiate a Migrator bound to that adapter.\n", + "logger = setup_logger()\n", + "migrator = Migrator(adapter=adapter, logger=logger)\n", + "\n", + "# Execute the Migrator's `upgrade` method to perform the migration.\n", + "migrator.upgrade()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "4c090068", + "metadata": {}, + "source": [ + "### Validate the transformed documents\n", + "\n", + "Now that we have transformed the database, validate each document in each collection in the \"transformer\" MongoDB server." + ] + }, + { + "cell_type": "code", + "id": "e1c50b9911e02e70", + "metadata": {}, + "source": [ + "# Get the names of all collections.\n", + "collection_names: List[str] = get_collection_names_from_schema(schema_view)\n", + "\n", + "# Ensure that, if the (large) \"functional_annotation_agg\" collection is present in `collection_names`,\n", + "# it goes at the end of the list we process. That way, we can find out about validation errors in\n", + "# other collections without having to wait for that (large) collection to be validated.\n", + "ordered_collection_names = sorted(collection_names.copy())\n", + "large_collection_name = \"functional_annotation_agg\"\n", + "if large_collection_name in ordered_collection_names:\n", + " ordered_collection_names = list(filter(lambda n: n != large_collection_name, ordered_collection_names))\n", + " ordered_collection_names.append(large_collection_name) # puts it last\n", + "\n", + "for collection_name in ordered_collection_names:\n", + " collection = transformer_mongo_client[\"nmdc\"][collection_name]\n", + " num_documents_in_collection = collection.count_documents({})\n", + " print(f\"Validating collection {collection_name} ({num_documents_in_collection} documents)\")\n", + "\n", + " for document in collection.find():\n", + " # Validate the transformed document.\n", + " #\n", + " # Reference: https://github.com/microbiomedata/nmdc-schema/blob/main/src/docs/schema-validation.md\n", + " #\n", + " # Note: Dictionaries originating as Mongo documents include a Mongo-generated key named `_id`. However,\n", + " # the NMDC Schema does not describe that key and, indeed, data validators consider dictionaries\n", + " # containing that key to be invalid with respect to the NMDC Schema. So, here, we validate a\n", + " # copy (i.e. a shallow copy) of the document that lacks that specific key.\n", + " #\n", + " # Note: `root_to_validate` is a dictionary having the shape: { \"some_collection_name\": [ some_document ] }\n", + " # Reference: https://docs.python.org/3/library/stdtypes.html#dict (see the \"type constructor\" section)\n", + " #\n", + " document_without_underscore_id_key = {key: value for key, value in document.items() if key != \"_id\"}\n", + " root_to_validate = dict([(collection_name, [document_without_underscore_id_key])])\n", + " nmdc_jsonschema_validator.validate(root_to_validate) # raises exception if invalid" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "3edf77c7", + "metadata": {}, + "source": [ + "### Dump the collections from the \"transformer\" MongoDB server\n", + "\n", + "Now that the collections have been transformed and validated, dump them **from** the \"transformer\" MongoDB server **into** a local directory." + ] + }, + { + "cell_type": "code", + "id": "db6e432d", + "metadata": {}, + "source": [ + "# Dump the database from the \"transformer\" MongoDB server.\n", + "shell_command = f\"\"\"\n", + " {mongodump} \\\n", + " --host='{cfg.transformer_mongo_host}' \\\n", + " --port='{cfg.transformer_mongo_port}' \\\n", + " --username='{cfg.transformer_mongo_username}' \\\n", + " --password='{cfg.transformer_mongo_password}' \\\n", + " --authenticationDatabase='admin' \\\n", + " --db='nmdc' \\\n", + " --gzip \\\n", + " --out='{cfg.transformer_dump_folder_path}'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\") " + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "997fcb281d9d3222", + "metadata": { + "collapsed": false + }, + "source": [ + "### Create a bookkeeper\n", + "\n", + "Create a `Bookkeeper` that can be used to document migration events in the \"origin\" server." + ] + }, + { + "cell_type": "code", + "id": "dbbe706d", + "metadata": {}, + "source": [ + "bookkeeper = Bookkeeper(mongo_client=origin_mongo_client)" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "1e0c8891", + "metadata": {}, + "source": [ + "### Indicate — on the \"origin\" server — that the migration is underway\n", + "\n", + "Add an entry to the migration log collection to indicate that this migration has started." + ] + }, + { + "cell_type": "code", + "id": "ca49f61a", + "metadata": {}, + "source": [ + "bookkeeper.record_migration_event(migrator=migrator, event=MigrationEvent.MIGRATION_STARTED)" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "9c253e6f", + "metadata": {}, + "source": [ + "### Drop the original collections from the \"origin\" MongoDB server\n", + "\n", + "This is necessary for situations where collections were renamed or deleted. (The `--drop` option of `mongorestore` only drops collections that exist in the dump.)" + ] + }, + { + "cell_type": "code", + "id": "0b26e434", + "metadata": {}, + "source": [ + "shell_command = f\"\"\"\n", + " {cfg.mongosh_path} \\\n", + " --host='{cfg.origin_mongo_host}' \\\n", + " --port='{cfg.origin_mongo_port}' \\\n", + " --username='{cfg.origin_mongo_username}' \\\n", + " --password='{cfg.origin_mongo_password}' \\\n", + " --eval 'use nmdc' \\\n", + " --eval 'db.dropDatabase()'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "d84bdc11", + "metadata": {}, + "source": [ + "### Load the collections into the \"origin\" MongoDB server\n", + "\n", + "Load the transformed collections into the \"origin\" MongoDB server." + ] + }, + { + "cell_type": "code", + "id": "1dfbcf0a", + "metadata": {}, + "source": [ + "# Load the transformed collections into the origin server, replacing any same-named ones that are there.\n", + "shell_command = f\"\"\"\n", + " {mongorestore} \\\n", + " --host='{cfg.origin_mongo_host}' \\\n", + " --port='{cfg.origin_mongo_port}' \\\n", + " --username='{cfg.origin_mongo_username}' \\\n", + " --password='{cfg.origin_mongo_password}' \\\n", + " --authenticationDatabase='admin' \\\n", + " --gzip \\\n", + " --verbose \\\n", + " --dir='{cfg.transformer_dump_folder_path}' \\\n", + " --drop \\\n", + " --preserveUUID \\\n", + " --stopOnError\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\") " + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "ca5ee89a79148499", + "metadata": { + "collapsed": false + }, + "source": [ + "### Indicate that the migration is complete\n", + "\n", + "Add an entry to the migration log collection to indicate that this migration is complete." + ] + }, + { + "cell_type": "code", + "id": "d1eaa6c92789c4f3", + "metadata": { + "collapsed": false + }, + "source": [ + "bookkeeper.record_migration_event(migrator=migrator, event=MigrationEvent.MIGRATION_COMPLETED)" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "04c856a8", + "metadata": {}, + "source": [ + "### Restore access to the \"origin\" MongoDB server\n", + "\n", + "This effectively un-does the access revocation that we did earlier." + ] + }, + { + "cell_type": "code", + "id": "9aab3c7e", + "metadata": {}, + "source": [ + "shell_command = f\"\"\"\n", + " {cfg.mongosh_path} \\\n", + " --host='{cfg.origin_mongo_host}' \\\n", + " --port='{cfg.origin_mongo_port}' \\\n", + " --username='{cfg.origin_mongo_username}' \\\n", + " --password='{cfg.origin_mongo_password}' \\\n", + " --quiet \\\n", + " --file='mongosh-scripts/restore-privileges.mongo.js'\n", + "\"\"\"\n", + "completed_process = subprocess.run(shell_command, shell=True)\n", + "print(f\"\\nReturn code: {completed_process.returncode}\")" + ], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demo/metadata_migration/notebooks/mongosh-scripts/restore-privileges.mongo.js b/demo/metadata_migration/notebooks/mongosh-scripts/restore-privileges.mongo.js new file mode 100644 index 00000000..b9f3a9f9 --- /dev/null +++ b/demo/metadata_migration/notebooks/mongosh-scripts/restore-privileges.mongo.js @@ -0,0 +1,75 @@ +/** + * This mongosh script restores all standard NMDC user-defined Mongo roles + * (except for the "nmdc_migrator" role) to their standard states. + * + * Note: This script contains excerpts from the authoritative user-defined role reference script at: + * https://github.com/microbiomedata/infra-admin/blob/main/mongodb/roles/createRoles.mongo.js + * You can compare this file to that one, using: https://www.diffchecker.com/text-compare/ + * + * Note: I select the database via `db.getSiblingDB()` since the `use` helper isn't available here. + * Reference: https://www.mongodb.com/docs/manual/reference/method/db.getSiblingDB/ + */ + +const db = db.getSiblingDB("admin"); + +db.updateRole("nmdc_runtime", { + privileges: [], + roles: [ + { db: "admin", role: "readWriteAnyDatabase" }, + { db: "admin", role: "dbAdminAnyDatabase" }, + ], +}); + +db.updateRole("nmdc_scheduler", { + privileges: [ + { resource: { db: "nmdc", collection: "jobs" }, actions: ["find", "insert", "update", "remove"] } + ], + roles: [ + { db: "nmdc", role: "read" }, + ], +}); + +db.updateRole("nmdc_aggregator", { + privileges: [ + { resource: { db: "nmdc", collection: "metap_gene_function_aggregation" }, actions: ["find", "insert", "update", "remove"] }, + { resource: { db: "nmdc", collection: "functional_annotation_agg" }, actions: ["find", "insert", "update", "remove"] }, + ], + roles: [ + { db: "nmdc", role: "read" }, + ], +}); + +db.updateRole("nmdc_reader", { + privileges: [ + { resource: { db: "", collection: "" }, actions: ["changeOwnPassword"] }, + ], + roles: [ + { db: "nmdc", role: "read" }, + { db: "nmdc_updated", role: "read" }, + { db: "nmdc_deleted", role: "read" }, + { db: "nmdc_changesheet_submission_results", role: "read" }, + ], +}); + +db.updateRole("nmdc_editor", { + privileges: [ + { resource: { db: "", collection: "" }, actions: ["changeOwnPassword"] }, + ], + roles: [ + { db: "nmdc", role: "readWrite" }, + { db: "nmdc_updated", role: "readWrite" }, + { db: "nmdc_deleted", role: "readWrite" }, + { db: "nmdc_changesheet_submission_results", role: "readWrite" }, + ], +}); + +db.updateRole("all_dumper", { + privileges: [ + { resource: { db: "config", collection: "system.preimages" }, actions: ["find"] }, + ], + roles: [ + { db: "admin", role: "backup" }, + ], +}); + +print("✅ Access restored."); \ No newline at end of file diff --git a/demo/metadata_migration/notebooks/mongosh-scripts/revoke-privileges.mongo.js b/demo/metadata_migration/notebooks/mongosh-scripts/revoke-privileges.mongo.js new file mode 100644 index 00000000..588ec21e --- /dev/null +++ b/demo/metadata_migration/notebooks/mongosh-scripts/revoke-privileges.mongo.js @@ -0,0 +1,19 @@ +/** + * This mongosh script revokes all privileges from all standard NMDC user-defined Mongo roles + * (except for the "nmdc_migrator" role). + * + * Note: I select the database via `db.getSiblingDB()` since the `use` helper isn't available here. + * Reference: https://www.mongodb.com/docs/manual/reference/method/db.getSiblingDB/ + */ + + +const db = db.getSiblingDB("admin"); + +db.updateRole("nmdc_reader", { privileges: [], roles: [] }); +db.updateRole("nmdc_editor", { privileges: [], roles: [] }); +db.updateRole("nmdc_runtime", { privileges: [], roles: [] }); +db.updateRole("nmdc_aggregator", { privileges: [], roles: [] }); +db.updateRole("nmdc_scheduler", { privileges: [], roles: [] }); +db.updateRole("all_dumper", { privileges: [], roles: [] }); + +print("✋ Access revoked."); \ No newline at end of file diff --git a/demo/metadata_migration/notebooks/requirements.txt b/demo/metadata_migration/notebooks/requirements.txt index 0096244c..63794fcc 100644 --- a/demo/metadata_migration/notebooks/requirements.txt +++ b/demo/metadata_migration/notebooks/requirements.txt @@ -2,4 +2,5 @@ dictdiffer==0.9.0 jsonschema==4.19.2 pymongo==4.7.2 python-dotenv==1.0.0 -PyYAML==6.0.1 \ No newline at end of file +PyYAML==6.0.1 +linkml-runtime==1.8.2 \ No newline at end of file diff --git a/demo/metadata_migration/notebooks/test_helpers.py b/demo/metadata_migration/notebooks/test_helpers.py index 761a6f08..f1aa2317 100644 --- a/demo/metadata_migration/notebooks/test_helpers.py +++ b/demo/metadata_migration/notebooks/test_helpers.py @@ -19,7 +19,13 @@ class TestConfig(unittest.TestCase): """ def test_init_method(self): - with TempFile() as notebook_config_file, TempFile() as origin_mongo_config_file, TempFile() as transformer_mongo_config_file, TempFile() as mongodump_binary, TempFile() as mongorestore_binary: + with (TempFile() as notebook_config_file, + TempFile() as origin_mongo_config_file, + TempFile() as transformer_mongo_config_file, + TempFile() as mongodump_binary, + TempFile() as mongorestore_binary, + TempFile() as mongosh_binary): + # Create named temporary directories and get their paths. origin_dump_folder_path = mkdtemp() transformer_dump_folder_path = mkdtemp() @@ -27,6 +33,14 @@ def test_init_method(self): # Populate the Mongo config files, then reset their file pointers. origin_mongo_server_uri = f"mongodb://u:p@origin:12345" transformer_mongo_server_uri = f"mongodb://u:p@transformer:12345" + origin_mongo_host = "origin" + origin_mongo_port = "11111" + origin_mongo_username = "origin_username" + origin_mongo_password = "origin_password" + transformer_mongo_host = "transformer" + transformer_mongo_port = "22222" + transformer_mongo_username = "transformer_username" + transformer_mongo_password = "transformer_password" origin_mongo_yaml = f"uri: {origin_mongo_server_uri}\n" transformer_mongo_yaml = f"uri: {transformer_mongo_server_uri}\n" origin_mongo_config_file.write(origin_mongo_yaml.encode("utf-8")) @@ -37,17 +51,25 @@ def test_init_method(self): # Use familiar aliases in an attempt to facilitate writing the `assert` section below. mongodump_path = mongodump_binary.name mongorestore_path = mongorestore_binary.name + mongosh_path = mongosh_binary.name origin_mongo_config_file_path = origin_mongo_config_file.name transformer_mongo_config_file_path = transformer_mongo_config_file.name # Populate the notebook config file, then reset its file pointer. notebook_config_values = dict( - PATH_TO_ORIGIN_MONGO_CONFIG_FILE=origin_mongo_config_file_path, - PATH_TO_TRANSFORMER_MONGO_CONFIG_FILE=transformer_mongo_config_file_path, PATH_TO_ORIGIN_MONGO_DUMP_FOLDER=origin_dump_folder_path, PATH_TO_TRANSFORMER_MONGO_DUMP_FOLDER=transformer_dump_folder_path, PATH_TO_MONGODUMP_BINARY=mongodump_path, PATH_TO_MONGORESTORE_BINARY=mongorestore_path, + PATH_TO_MONGOSH_BINARY=mongosh_path, + ORIGIN_MONGO_HOST=origin_mongo_host, + ORIGIN_MONGO_PORT=origin_mongo_port, + ORIGIN_MONGO_USERNAME=origin_mongo_username, + ORIGIN_MONGO_PASSWORD=origin_mongo_password, + TRANSFORMER_MONGO_HOST=transformer_mongo_host, + TRANSFORMER_MONGO_PORT=transformer_mongo_port, + TRANSFORMER_MONGO_USERNAME=transformer_mongo_username, + TRANSFORMER_MONGO_PASSWORD=transformer_mongo_password, ) for key, value in notebook_config_values.items(): notebook_config_file.write(f"{key} = {value}\n".encode("utf-8")) @@ -61,13 +83,14 @@ def test_init_method(self): assert cfg.mongorestore_path == mongorestore_path assert cfg.origin_dump_folder_path == origin_dump_folder_path assert cfg.transformer_dump_folder_path == transformer_dump_folder_path - assert cfg.origin_mongo_config_file_path == origin_mongo_config_file_path - assert ( - cfg.transformer_mongo_config_file_path - == transformer_mongo_config_file_path - ) - assert cfg.origin_mongo_server_uri == origin_mongo_server_uri - assert cfg.transformer_mongo_server_uri == transformer_mongo_server_uri + assert cfg.origin_mongo_host == origin_mongo_host + assert cfg.origin_mongo_port == origin_mongo_port + assert cfg.origin_mongo_username == origin_mongo_username + assert cfg.origin_mongo_password == origin_mongo_password + assert cfg.transformer_mongo_host == transformer_mongo_host + assert cfg.transformer_mongo_port == transformer_mongo_port + assert cfg.transformer_mongo_username == transformer_mongo_username + assert cfg.transformer_mongo_password == transformer_mongo_password # Delete the temporary directories (i.e. clean up). shutil.rmtree(origin_dump_folder_path)