Skip to content

Commit

Permalink
Update ABFE workflows
Browse files Browse the repository at this point in the history
- Avoid running in parallel as this opens too many files/
  resources
- Set all inputs not to be optional so that we can loop
- Add CLI test
  • Loading branch information
fjclark committed May 3, 2024
1 parent 23e3115 commit bb0f5b9
Show file tree
Hide file tree
Showing 8 changed files with 486 additions and 133 deletions.
95 changes: 64 additions & 31 deletions maize/graphs/exs/biosimspace/abfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@
MinimiseGromacs,
Parameterise,
ProductionGromacs,
SaveAFEResults,
SaveAFEResult,
Solvate,
StageType,
)
from maize.steps.exs.biosimspace._utils import IsomerToSDF
from maize.steps.exs.biosimspace._utils import (
IsomerToSDF,
SdfPathtoIsomerList,
make_inputs_required,
)
from maize.steps.io import LoadData, Return
from maize.steps.plumbing import Accumulate, Copy, Scatter
from maize.utilities.chem.chem import Isomer
Expand All @@ -40,9 +44,9 @@
]


class AbsoluteBindingFreeEnergySingleRepeat(Graph):
class AbsoluteBindingFreeEnergy(Graph):
"""
A class for running a single absolute binding free energy calculation
A class for running an absolute binding free energy calculation
using SOMD through BioSimSpace. This requires parameterised and equilibrated
input structures, and performs short production simulations followed by
ABFE calculations.
Expand All @@ -64,15 +68,15 @@ class AbsoluteBindingFreeEnergySingleRepeat(Graph):
# gro87, grotop, prm7, rst rst7
# """

inp_bound: Input[list[Path]] = Input(optional=True)
"""
Paths to equilibrated input files for the bound leg. A
topology and a coordinate file are required. These can
be in any of the formats given by BSS.IO.fileFormats()
e.g.:
gro87, grotop, prm7, rst rst7
"""
# inp_bound: Input[list[Path]] = Input(optional=True)
# """
# Paths to equilibrated input files for the bound leg. A
# topology and a coordinate file are required. These can
# be in any of the formats given by BSS.IO.fileFormats()
# e.g.:

# gro87, grotop, prm7, rst rst7
# """

# Parameters
lam_vals: Parameter[dict[LegType, [StageType, list[float]]]] = Parameter(
Expand Down Expand Up @@ -233,6 +237,7 @@ def build(self) -> None:
"tau_t",
"thermostat_time_constant",
"timestep",
"n_replicates",
]
for param_name in param_names_to_map:
self.combine_parameters(
Expand All @@ -242,6 +247,9 @@ def build(self) -> None:
# Map the overall inputs
self.out = self.map_port(collect_results.out)

# Make all inputs required so that we can do things like looping
make_inputs_required(self, input_names=["inp_bound", "inp_free", "inp"])


class AbsoluteBindingFreeEnergyMultiRepeat(Graph):
"""
Expand All @@ -265,7 +273,7 @@ def build(self) -> None:
# Add all the repeat ABFE nodes
abfe_subgraph = self.add(
parallel(
AbsoluteBindingFreeEnergySingleRepeat,
AbsoluteBindingFreeEnergy,
n_branches=self.n_repeats.value,
inputs=[],
constant_inputs=["inp_bound", "inp_free"],
Expand All @@ -284,12 +292,32 @@ def build(self) -> None:
self.out = self.map_port(accumulate_results.out, name="out")


class AbsoluteBindingFreeEnergyMultiWithPrep(Graph):
class AbsoluteBindingFreeEnergyWithPrep(Graph):
"""
A class for running multiple repeat absolute binding free energy calculations
A class for running absolute binding free energy calculations
from unparameterised input structures.
"""

inp_free: Input[list[Path]] = Input()
"""
Paths to equilibrated input files for the free leg. A
topology and a coordinate file are required. These can
be in any of the formats given by BSS.IO.fileFormats()
e.g.:
gro87, grotop, prm7, rst rst7
"""

inp_bound: Input[list[Path]] = Input()
"""
Paths to equilibrated input files for the bound leg. A
topology and a coordinate file are required. These can
be in any of the formats given by BSS.IO.fileFormats()
e.g.:
gro87, grotop, prm7, rst rst7
"""

def build(self) -> None:

# We need to copy input to the bound and free legs
Expand All @@ -300,10 +328,10 @@ def build(self) -> None:
sys_prep_bound = self.add(SystemPreparationBound, name="SystemPreparationBound")

# Run repeats of ABFE calculations
abfe_calc = self.add(AbsoluteBindingFreeEnergyMultiRepeat, name="AbsoluteBindingFreeEnergy")
abfe_calc = self.add(AbsoluteBindingFreeEnergy, name="AbsoluteBindingFreeEnergy")

# Save the ABFE results
save_results = self.add(SaveAFEResults, name="SaveAFEResults")
save_results = self.add(SaveAFEResult, name="SaveAFEResult")

# Connect the nodes
self.connect(copy_inp.out, sys_prep_free.inp)
Expand Down Expand Up @@ -337,10 +365,13 @@ def build(self) -> None:
# self.combine_parameters(copy_inp.inp, name="lig_sdf_path")
self.inp = self.map_port(copy_inp.inp, name="inp")

# Make inputs required so that we can loop the nodes
make_inputs_required(self, input_names=["inp_free", "inp_bound", "inp"])


class AbsoluteBindingFreeEnergyMultiIsomer(Graph):
"""
A subgraph for running multiple repeat absolute binding free energy calculations
A subgraph for running absolute binding free energy calculations
on a set of isomers.
"""

Expand All @@ -357,15 +388,15 @@ def build(self) -> None:
scatter = self.add(Scatter[Isomer], name="ScatterIsomers")

# Convert the input isomer paths to sdfs
iso_to_sdf_paths = self.add(IsomerToSDF, name="IsomerToSDF", loop=False)
iso_to_sdf_paths = self.add(IsomerToSDF, name="IsomerToSDF", loop=True)

# TODO: Figure out why adding loop=True causes nodes to run with no
# input, even if
# Run the ABFE calculations on each isomer
abfe_calc = self.add(
AbsoluteBindingFreeEnergyMultiWithPrep,
AbsoluteBindingFreeEnergyWithPrep,
name="AbsoluteBindingFreeEnergy",
loop=False,
loop=True,
)

# Return the scored isomers
Expand All @@ -389,16 +420,20 @@ def get_abfe_multi_isomer_workflow() -> Workflow:
of isomers.
"""

flow = Workflow(name="absolute_binding_free_energy_multi_isomer")
flow = Workflow(name="absolute_binding_free_energy_multi_isomer", cleanup_temp=False)

load_data = flow.add(LoadData[list[Isomer]], name="LoadData")
load_data = flow.add(LoadData[Path], name="LoadSdfPath")

# Convert to isomers
sdf_to_isomers = flow.add(SdfPathtoIsomerList, name="SdfPathtoIsomerList")

abfe_multi_isomer = flow.add(
AbsoluteBindingFreeEnergyMultiIsomer, name="AbsoluteBindingFreeEnergy"
)

# Connect the nodes/ subgraphs
flow.connect(load_data.out, abfe_multi_isomer.inp)
flow.connect(load_data.out, sdf_to_isomers.inp)
flow.connect(sdf_to_isomers.out, abfe_multi_isomer.inp)

# Map the inputs/ parameters
flow.map(*abfe_multi_isomer.parameters.values())
Expand All @@ -418,8 +453,8 @@ def get_abfe_no_prep_workflow() -> Workflow:

flow = Workflow(name="absolute_binding_free_energy_no_prep")

abfe_calc = flow.add(AbsoluteBindingFreeEnergyMultiRepeat, name="AbsoluteBindingFreeEnergy")
save_results = flow.add(SaveAFEResults, name="SaveAFEResults")
abfe_calc = flow.add(AbsoluteBindingFreeEnergy, name="AbsoluteBindingFreeEnergy")
save_results = flow.add(SaveAFEResult, name="SaveAFEResult")

# Connect the nodes/ subgraphs
flow.connect(abfe_calc.out, save_results.inp)
Expand All @@ -429,7 +464,7 @@ def get_abfe_no_prep_workflow() -> Workflow:
flow.combine_parameters(abfe_calc.inp_free, name="inp_free")
flow.map(*abfe_calc.parameters.values())
# TODO: Figure out why supplying a default value for the save_results.file parameter
# doesn't work (still needs to be supplied for CLI if no default is given in SaveAFEResults)
# doesn't work (still needs to be supplied for CLI if no default is given in SaveAFEResult)
flow.combine_parameters(save_results.file, name="results_file_name")

return flow
Expand All @@ -446,9 +481,7 @@ def get_abfe_with_prep_workflow() -> Workflow:
flow = Workflow(name="absolute_binding_free_energy")

# Run setup and repeats of ABFE calculations
abfe_with_prep = flow.add(
AbsoluteBindingFreeEnergyMultiWithPrep, name="AbsoluteBindingFreeEnergy"
)
abfe_with_prep = flow.add(AbsoluteBindingFreeEnergyWithPrep, name="AbsoluteBindingFreeEnergy")

# Map the inputs/ parameters
flow.map(*abfe_with_prep.parameters.values())
Expand Down
4 changes: 2 additions & 2 deletions maize/graphs/exs/biosimspace/system_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class _SystemPreparationBase(Graph, ABC):
"""The path to the ligand sdf file."""

# Output
out: Output[list[Path]] = Output(optional=True)
out: Output[tuple[Path, Path]] = Output(optional=True)
"""The output files for the system generated by BioSimSpace, in gromacs format."""

# Parameters
Expand Down Expand Up @@ -223,7 +223,7 @@ def sys_prep_workflow_fn() -> Workflow:
SystemPreparationFree if leg_type == LegType.FREE else SystemPreparationBound
)
system_preparation = flow.add(sys_prep_class, name=sys_prep_class.__name__)
retu = flow.add(Return[list[Path]], name="Return")
retu = flow.add(Return[tuple[Path, Path]], name="Return")

flow.connect(system_preparation.out, retu.inp)

Expand Down
61 changes: 60 additions & 1 deletion maize/steps/exs/biosimspace/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from pathlib import Path
from typing import Callable

from maize.core.component import Component
from maize.core.interface import Input, Output
from maize.core.node import Node
from maize.core.workflow import Workflow, expose
from maize.steps.io import Return
from maize.utilities.chem import Isomer
from maize.utilities.chem import Isomer, load_sdf_library

from .enums import BSSEngine

Expand Down Expand Up @@ -302,3 +303,61 @@ def run(self) -> None:

# Send the path to the output
self.out.send(sdf_path)


class SdfPathtoIsomerList(Node):
"""
Convert a path to an SDF file to a list of Isomer objects.
"""

# Input
inp: Input[Path] = Input()
"""
Path to the input SDF file.
"""

out: Output[list[Isomer]] = Output()
"""
List of Isomer objects.
"""

def run(self) -> None:

# Get the sdf input
sdf_path = self.inp.receive()

# Load the isomers
isomer_collections = load_sdf_library(sdf_path, split_strategy="none")

# Convert to lists of isomers since we've read in all the sdfs individually
isomers = [
isomer
for isomer_collection in isomer_collections
for isomer in isomer_collection.molecules
]

# Send the isomers to the output
self.out.send(isomers)


def make_inputs_required(component: Component, input_names: list[str] = ["inp"]) -> None:
"""
Recursively make all requested inputs required for component and contained
components.
Parameters
----------
component : Component
The node to make inputs required for.
input_names : list[str], optional
The names of the inputs to make required. Default is ["inp"].
"""
if hasattr(component, "inputs"):
for input_name in input_names:
if input_name in component.inputs:
component.inputs[input_name].optional = False

if hasattr(component, "nodes"):
for node in component.nodes.values():
make_inputs_required(node, input_names)
Loading

0 comments on commit bb0f5b9

Please sign in to comment.