diff --git a/alchemiscale/compute/service.py b/alchemiscale/compute/service.py index e4f15ce5..50897ce1 100644 --- a/alchemiscale/compute/service.py +++ b/alchemiscale/compute/service.py @@ -251,9 +251,14 @@ def claim_tasks(self, count=1) -> List[Optional[ScopedKey]]: # claim tasks from taskhubs based on weight; keep going till we hit our # total desired task count, or we run out of taskhubs to draw from while len(tasks) < count and len(taskhubs) > 0: + weights = [th.weight for th in taskhubs.values()] + + if sum(weights) == 0: + break + # based on weights, choose taskhub to draw from taskhub: List[ScopedKey] = random.choices( - list(taskhubs.keys()), weights=[th.weight for th in taskhubs.values()] + list(taskhubs.keys()), weights=weights )[0] # claim tasks from the taskhub diff --git a/devtools/conda-envs/alchemiscale-client.yml b/devtools/conda-envs/alchemiscale-client.yml index 09b47062..f2ed56e1 100644 --- a/devtools/conda-envs/alchemiscale-client.yml +++ b/devtools/conda-envs/alchemiscale-client.yml @@ -31,5 +31,5 @@ dependencies: - pip: - nest_asyncio - async_lru - - git+https://github.com/openforcefield/alchemiscale.git@v0.3.0 + - git+https://github.com/openforcefield/alchemiscale.git@v0.4.0 - git+https://github.com/choderalab/perses.git@protocol-neqcyc diff --git a/devtools/conda-envs/alchemiscale-compute.yml b/devtools/conda-envs/alchemiscale-compute.yml index dd55d74c..2e033e00 100644 --- a/devtools/conda-envs/alchemiscale-compute.yml +++ b/devtools/conda-envs/alchemiscale-compute.yml @@ -27,5 +27,5 @@ dependencies: - pip: - async_lru - - git+https://github.com/openforcefield/alchemiscale.git@v0.3.0 + - git+https://github.com/openforcefield/alchemiscale.git@v0.4.0 - git+https://github.com/choderalab/perses.git@protocol-neqcyc diff --git a/devtools/conda-envs/alchemiscale-server.yml b/devtools/conda-envs/alchemiscale-server.yml index 26759cb4..8dee891b 100644 --- a/devtools/conda-envs/alchemiscale-server.yml +++ b/devtools/conda-envs/alchemiscale-server.yml @@ -46,5 +46,5 @@ dependencies: - pip: - async_lru - - git+https://github.com/openforcefield/alchemiscale.git@v0.3.0 + - git+https://github.com/openforcefield/alchemiscale.git@v0.4.0 - git+https://github.com/choderalab/perses.git@protocol-neqcyc diff --git a/docker/alchemiscale-server/docker-compose.yml b/docker/alchemiscale-server/docker-compose.yml index a2bfe55c..a9a7e9e6 100644 --- a/docker/alchemiscale-server/docker-compose.yml +++ b/docker/alchemiscale-server/docker-compose.yml @@ -8,7 +8,7 @@ networks: services: neo4j: - image: ${NEO4J_DOCKER_IMAGE:-neo4j:5.16} + image: ${NEO4J_DOCKER_IMAGE:-neo4j:5.18} hostname: neo4j # Service-level network, which specifies the networks, from the list of the top-level networks (in this case only neo4j-internal), that the server will connect to. # Adds a network alias (used in neo4j.conf when configuring the discovery members) diff --git a/docs/conf.py b/docs/conf.py index 5a7c3348..895c4f15 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,10 +51,10 @@ ] intersphinx_mapping = { - 'gufe': ('https://gufe.readthedocs.io/en/latest/', None), - 'openfe': ('https://docs.openfree.energy/en/stable/', None), - 'perses': ('https://perses.readthedocs.io/en/latest/', None), - 'python': ('https://docs.python.org/3', None), + "gufe": ("https://gufe.readthedocs.io/en/latest/", None), + "openfe": ("https://docs.openfree.energy/en/stable/", None), + "perses": ("https://perses.readthedocs.io/en/latest/", None), + "python": ("https://docs.python.org/3", None), } # -- Options for HTML output ------------------------------------------------- diff --git a/docs/operations.rst b/docs/operations.rst index 7626c454..1d4d073a 100644 --- a/docs/operations.rst +++ b/docs/operations.rst @@ -1,23 +1,25 @@ ########## Operations ########## +After deploying an ``alchemiscale`` instance, it is necessary to manage the instance, especially the state maintained in its state store. +This document details common operations an administrator may need to perform over the life of the instance to keep it in good working order. -********* -Add users -********* +************ +Adding users +************ To add a new user identity, you will generally use the ``alchemiscale`` CLI:: - $ export NEO4J_URL=bolt://:7687 - $ export NEO4J_USER= - $ export NEO4J_PASS= - $ - $ # add a user identity, with key - $ alchemiscale identity add -t user -i -k - $ - $ # add one or more scopes the user should have access to - $ alchemiscale identity add-scope -t user -i -s -s ... + export NEO4J_URL=bolt://:7687 + export NEO4J_USER= + export NEO4J_PASS= + + # add a user identity, with key + alchemiscale identity add -t user -i -k + + # add one or more scopes the user should have access to + alchemiscale identity add-scope -t user -i -s -s ... To add a new compute identity, perform the same operation as for user identities given above, **but replace ``-t user`` with ``-t compute``**. Compute identities are needed by compute services to authenticate with and use the compute API. @@ -28,14 +30,16 @@ Compute identities are needed by compute services to authenticate with and use t For a ``docker-compose``-based deployment, it is easiest to do the above using the same ``alchemiscale-server`` image the API services are deployed with:: - $ docker run --rm -it --network docker_db -e NEO4J_URL=bolt://neo4j:7687 -e NEO4J_USER= -e NEO4J_PASS= \ - identity add -t user \ - -i \ - -k - $ docker run --rm -it --network docker_db -e NEO4J_URL=bolt://neo4j:7687 -e NEO4J_USER= -e NEO4J_PASS= \ - identity add-scope -t user \ - -i \ - -s -s ... + docker run --rm -it --network docker_db -e NEO4J_URL=bolt://neo4j:7687 -e NEO4J_USER= -e NEO4J_PASS= \ + \ + identity add -t user \ + -i \ + -k + docker run --rm -it --network docker_db -e NEO4J_URL=bolt://neo4j:7687 -e NEO4J_USER= -e NEO4J_PASS= \ + \ + identity add-scope -t user \ + -i \ + -s -s ... The important bits here are: @@ -47,9 +51,9 @@ The important bits here are: These should match the values set in ``.env``. -******* -Backups -******* +************************************* +Performing and restoring from backups +************************************* Performing regular backups of the state store is an important operational component for any production deployment of ``alchemiscale``. To do this, **first shut down the Neo4j service so that no database processes are currently running**. @@ -57,6 +61,9 @@ To do this, **first shut down the Neo4j service so that no database processes ar The instructions below assume a Docker-based deployment, perhaps via ``docker-compose`` as in :ref:`deploy-docker-compose`. The same general principles apply to any deployment type, however. + +.. _database-dump: + Creating a database dump ======================== @@ -89,3 +96,69 @@ To later restore from a database dump, navigate to the directory containing your You may need to perform a ``chown -R`` following this operation to set correct ownership of the newly-loaded database contents. Automating the backup process to perform regular backups without human intervention for your deployment is ideal, but out of scope for this document. + + +********************************** +Performing upgrades and migrations +********************************** +In most cases, upgrading an ``alchemiscale`` instance to a new ``alchemiscale`` release only requires re-deployment of the API and compute services with a Docker image corresponding to that new release, and informing your users to also upgrade their client environments with the latest release as well. +In other cases, a migration may need to be performed on the state and/or object store to reflect schema changes, or to upgrade the state store itself to a newer version of ``neo4j``. + +This section gives specific guidance for ``alchemiscale`` release upgrades, in particular migration steps. + +v0.3 to v0.4 +============ +``alchemiscale`` v0.4 introduced a ``NetworkMark`` node and relationship for each ``AlchemicalNetwork``, supporting the concept of network state. +This change requires a migration on the state store. +In addition, ``alchemiscale`` v0.4 is the first release to use ``neo4j`` 5.x, requiring a migration of existing database data from ``neo4j`` 4.x. + +The instructions below assume a ``docker-compose``-based deployment; follow them in-order to complete the data migration. + +Migrate data from ``neo4j`` 4.4 to 5.18 +--------------------------------------- +1. Shut down your ``alchemiscale`` instance, including ``neo4j``. Perform a database dump as detailed above in :ref:`database-dump`. + +2. Rename this dump to ``neo4j.dump``. + +3. Delete the contents of the directory containing your database data; this directory contains a file called ``server_id``. + +4. Load the dump using ``neo4j`` 5.18; ``$BACKUPS_DIR`` should be set from the database dump performed in step 1:: + + export NEO4J_VERSION=5.18 + docker run --rm \ + -v $(pwd):/var/lib/neo4j/data \ + -v ${BACKUPS_DIR}:/tmp \ + --entrypoint /bin/bash \ + neo4j:${NEO4J_VERSION} \ + -c "neo4j-admin database load --from-path=/tmp neo4j" + +5. Migrate the loaded database from ``neo4j`` 4.x to 5.x:: + + export NEO4J_VERSION=5.18 + docker run --rm \ + -v $(pwd):/var/lib/neo4j/data \ + -v ${BACKUPS_DIR}:/tmp \ + --entrypoint /bin/bash \ + neo4j:${NEO4J_VERSION} \ + -c "neo4j-admin database migrate --force-btree-indexes-to-range neo4j" + +6. If necessary, perform a ``chown -R`` following this operation on the database data directory to set correct ownership of the newly-loaded database contents. + + +Migrate schema from ``alchemiscale`` 0.3 to 0.4 +----------------------------------------------- +1. Set the env variable ``NEO4J_DOCKER_IMAGE=5.18`` in your ``.env`` file for your ``docker-compose`` deployment. + +2. Start up the ``neo4j`` service only:: + + USER_ID=$(id -u) GROUP_ID=$(id -g) docker-compose up neo4j + +3. In another shell on the same host, perform the `alchemiscale` schema migration:: + + docker run --rm -it --network docker_db -e NEO4J_URL=bolt://neo4j:7687 -e NEO4J_USER= -e NEO4J_PASS= \ + ghcr.io/openforcefield/alchemiscale-server:v0.4.0 \ + database migrate v03-to-v04 + +4. Shut down the ``neo4j`` service (``Ctrl+C`` of running instance in step 2), then bring up the full set of services:: + + USER_ID=$(id -u) GROUP_ID=$(id -g) docker-compose up -d diff --git a/docs/user_guide.rst b/docs/user_guide.rst index ea1d621f..9a098e08 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -22,7 +22,7 @@ If this doesn’t work, clone alchemiscale from Github, and install from there:: $ git clone https://github.com/openforcefield/alchemiscale.git $ cd alchemiscale - $ git checkout v0.3.0 + $ git checkout v0.4.0 $ conda env create -f devtools/conda-envs/alchemiscale-client.yml