Skip to content

Commit

Permalink
Merge pull request #66 from NDoering99/main
Browse files Browse the repository at this point in the history
Update API documentation, discription errors in openmmdl and format w…
  • Loading branch information
talagayev authored Jan 26, 2024
2 parents d2ff963 + 007c4ad commit 0a5bf50
Show file tree
Hide file tree
Showing 34 changed files with 5,228 additions and 2,262 deletions.
694 changes: 690 additions & 4 deletions docs/openmmdl_analysis_functions.rst

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion openmmdl/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.0+648.gd517f79.dirty"
__version__ = "1.0.0+677.ge4e8207.dirty"
2 changes: 1 addition & 1 deletion openmmdl/openmmdl_analysis/barcode_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interact
)


def plot_bacodes_grouped(interactions, df_all, interaction_type, peptide=False):
def plot_bacodes_grouped(interactions, df_all, interaction_type):
"""generates barcode figures and groups them by ligandatom, aswell as total interaction barcode for a giveen lingenatom.
Args:
Expand Down
2 changes: 1 addition & 1 deletion openmmdl/openmmdl_analysis/binding_mode_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def filtering_values(threshold, frames, df, unique_columns_rings_grouped):
unique_columns_rings_grouped (dict): Dictionary containing the grouped and unique values otained from gather_interactions.
Returns:
dict: A dictionary with a single key named 'all' that contains a list of all combined values from all the sub-dictionaries.
list: A list of values, with unique values and their corresponding occurence counts.
"""
# Call the function to remove duplicate keys
unique_data = remove_duplicate_values(unique_columns_rings_grouped)
Expand Down
32 changes: 13 additions & 19 deletions openmmdl/openmmdl_analysis/find_stable_waters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
def trace_waters(topology, trajectory, output_directory):
"""trace the water molecules in a trajectory and write all which move below one Angstrom distance. To adjust the distance alter the integer
Args:
topology (pdb_file_name): Path to the topology file.
trajectory (dcd_file_name): Path to the trajectory file.
topology (str): Path to the topology file.
trajectory (str): Path to the trajectory file.
output_directory (str): Directory where output files will be saved.
Returns:
Expand Down Expand Up @@ -96,14 +96,13 @@ def perform_clustering_and_writing(
stable_waters, cluster_eps, total_frames, output_directory
):
"""
Perform DBSCAN clustering on the stable water coordinates, and write the clusters and their representatives to PDB files.
Args:
stable_waters (pd.DataFrame): DataFrame containing stable water coordinates.
cluster_eps (float): DBSCAN clustering epsilon parameter. This is in Angstrom in this case, and defines which Water distances should be within one cluster
total_frames (int): Total number of frames.
output_directory (str): Directory where output files will be saved.
Returns:
None, it writes files.
"""
# Feature extraction: XYZ coordinates
X = stable_waters[["Oxygen_X", "Oxygen_Y", "Oxygen_Z"]]
Expand Down Expand Up @@ -136,13 +135,13 @@ def write_pdb_clusters_and_representatives(
clustered_waters, min_samples, output_sub_directory
):
"""
Writes the clusters and their representatives to PDB files.
Args:
clustered_waters (pd.DataFrame): DataFrame containing clustered water coordinates.
min_samples (int): Minimum number of samples for DBSCAN clustering.
output_sub_directory (str): Subdirectory where output PDB files will be saved.
Returns:
None, it will output PDB files.
"""
atom_counter = 1
pdb_file_counter = 1
Expand Down Expand Up @@ -187,13 +186,11 @@ def stable_waters_pipeline(
):
"""Function to run the pipeline to extract stable water clusters, and their representatives from a PDB & DCD file
Args:
topology (PDB_file_name): Path to the topology file.
trajectory (DCD_file_name): Path to the trajectory file.
topology (str): Path to the topology file.
trajectory (str): Path to the trajectory file.
water_eps (float): DBSCAN clustering epsilon parameter.
output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters".
Returns:
None, it starts the pipeline which will create output files.
"""
# Load the PDB and DCD files
output_directory += "_clusterEps_"
Expand All @@ -211,10 +208,10 @@ def stable_waters_pipeline(
def filter_and_parse_pdb(protein_pdb):
"""This function reads in a PDB and returns the structure with bioparser.
Args:
protein_pdb (PDB_file_path): Path to a protein PDB file.
protein_pdb (str): Path to a protein PDB file.
Returns:
Structure: PDB structure object.
biopython.structure: PDB structure object.
"""
with open(protein_pdb, "r") as pdb_file:
lines = [
Expand Down Expand Up @@ -243,8 +240,8 @@ def filter_and_parse_pdb(protein_pdb):
def find_interacting_residues(structure, representative_waters, distance_threshold):
"""This function maps waters (e.g. the representative waters) to interacting residues of a different PDB structure input. Use "filter_and_parse_pdb" to get the input for this function
Args:
structure (Structure): PDB structure object.
representative_waters (pd.DataFrame): DataFrame containing representative water coordinates.
structure (biopython.structure): Biopython PDB structure object.
representative_waters (pandasd.DataFrame): DataFrame containing representative water coordinates.
distance_threshold (float): Threshold distance for identifying interacting residues.
Returns:
Expand Down Expand Up @@ -289,7 +286,7 @@ def read_pdb_as_dataframe(pdb_file):
pdb_file (str): Path to the PDB file.
Returns:
pd.DataFrame: DataFrame containing PDB data.
pandas.DataFrame: DataFrame containing PDB data.
"""
lines = []
with open(pdb_file, "r") as f:
Expand Down Expand Up @@ -326,9 +323,6 @@ def analyze_protein_and_water_interaction(
cluster_eps (float): DBSCAN clustering epsilon parameter.
output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters".
distance_threshold (float, optional): Threshold distance for identifying interacting residues. Default is 5.0 (Angstrom).
Returns:
None, it will write a csv file.
"""
output_directory += "_clusterEps_"
strEps = str(cluster_eps).replace(".", "")
Expand Down
4 changes: 2 additions & 2 deletions openmmdl/openmmdl_analysis/interaction_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def change_lig_to_residue(file_path, old_residue_name, new_residue_name):
Args:
file_path (str): Filepath of the topology file.
old_residue_name (str): Residue name of the ligand.
new_residue_name (str): New residue name of the ligand now changed to mimic a residue.
new_residue_name (str): New residue name of the ligand now changed to mimic an amino acid residue.
"""
with open(file_path, "r") as file:
lines = file.readlines()
Expand Down Expand Up @@ -371,7 +371,7 @@ def process_trajectory(
Args:
pdb_md (mda universe): MDAnalysis Universe class representation of the topology and the trajectory of the file that is being processed.
dataframe (pandas dataframe): Name of a CSV file as str, where the interaction data will be read from if not None.
dataframe (str): Name of a CSV file as str, where the interaction data will be read from if not None.
num_processes (int): The number of CPUs that will be used for the processing of the protein-ligand trajectory. Defaults to half of the CPUs in the system.
lig_name (str): Name of the Ligand in the complex that will be analyzed.
special_ligand (str): Name of the special ligand in the complex that will be analyzed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def min_transition_calculation(min_transition):


def binding_site_markov_network(
total_frames, min_transitions, combined_dict, font_size=None, size_node=None
total_frames, min_transitions, combined_dict, font_size=12, size_node=200
):
"""Generate Markov Chain plots based on transition probabilities.
Expand Down
7 changes: 4 additions & 3 deletions openmmdl/openmmdl_analysis/openmmdlanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
openmmdl_simulation.py
Perform Simulations of Protein-ligand complexes with OpenMM
"""

import argparse
import sys
import warnings
Expand Down Expand Up @@ -457,9 +458,9 @@ def main():

# Check if the fingerprint has been encountered before
if fingerprint in treshold_fingerprint_dict:
grouped_frames_treshold.at[
index, "Binding_fingerprint_treshold"
] = treshold_fingerprint_dict[fingerprint]
grouped_frames_treshold.at[index, "Binding_fingerprint_treshold"] = (
treshold_fingerprint_dict[fingerprint]
)
else:
# Assign a new label if the fingerprint is new
label = f"Binding_Mode_{label_counter}"
Expand Down
6 changes: 3 additions & 3 deletions openmmdl/openmmdl_analysis/pml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def generate_pharmacophore_centers(df, interactions):
interactions (list): list of interactions to generate pharmacophore from
Returns:
dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value
dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value
"""
coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
pharmacophore = {}
Expand Down Expand Up @@ -44,7 +44,7 @@ def generate_pharmacophore_vectors(df, interactions):
interactions (list): list of interactions to generate pharmacophore from
Returns:
dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side)
dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side)
"""
coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
pharmacophore = {}
Expand Down Expand Up @@ -409,7 +409,7 @@ def generate_pharmacophore_centers_all_points(df, interactions):
interactions (list): list of interactions to generate pharmacophore from
Returns:
dict: interaction from which pharmacophore is generated as key and list of coordinates as value
dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value
"""
coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
pharmacophore = {}
Expand Down
27 changes: 5 additions & 22 deletions openmmdl/openmmdl_analysis/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def increase_ring_indices(ring, lig_index):
Args:
ring (str): A list of atom indices belonging to a ring that need to be modified.
lig_index (str): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices
lig_index (int): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices
Returns:
list: A new list with modified atom indicies.
Expand Down Expand Up @@ -68,11 +68,11 @@ def process_pdb_file(input_pdb_filename):


def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname):
"""Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself .
"""Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself.
Args:
input_pdb_filename (str): name of the input PDB file
output_pdb_filename (str): name of the output PDB file
output_pdb_filename (str): name of the output SDF file
target_resname (str): resname of the ligand in the original PDB file
"""
# Load the PDB file using MDAnalysis
Expand All @@ -98,30 +98,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r
os.remove("lig.pdb")


def convert_pdb_to_sdf(input_pdb_filename, output_sdf_filename):
"""Convert ligand PDB file to SDF file for analysis using Open Babel.
Args:
input_pdb_filename (str): name of the input PDB file
output_sdf_filename (str): name of the output SDF file
"""
# Use subprocess to call Open Babel for the file format conversion
try:
subprocess.run(
["obabel", input_pdb_filename, "-O", output_sdf_filename], check=True
)
except subprocess.CalledProcessError as e:
print(f"Error converting PDB to SDF: {e}")
return


def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name):
"""Renumer the atoms of the ligand in the topology PDB file.
Args:
input_pdb_file (str): Path to the initial PDB file.
output_pdb_file (str): Path to the output PDB file.
lig_name (str): Name of the ligand in the PDB file.
lig_name (str): Name of the ligand in the input PDB file.
"""
# Read the input PDB file
with open(input_pdb_file, "r") as f:
Expand Down Expand Up @@ -202,7 +185,7 @@ def move_hydrogens_to_end(structure, target_residue_name):
"""Moves hydrogens to the last lines of theresidue in the PDB file.
Args:
structure (Biopython structure): Structure object containing the PDB file.
structure (Bio.structure): Structure object containing the PDB file.
target_residue_name (str): Name of the residue in the PDB file.
"""
# Counter for atom numbering within each residue
Expand Down
6 changes: 3 additions & 3 deletions openmmdl/openmmdl_analysis/rdkit_figure_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def create_and_merge_images(
merged_image_paths (list): A list with the paths to the rdkit figures.
Returns:
_type_: _description_
list: Paths to the merged images.
"""
# Create the main figure and axis
fig = pylab.figure()
Expand Down Expand Up @@ -440,8 +440,8 @@ def arranged_figure_generation(merged_image_paths, output_path):
"""Generate an arranged figure by arranging merged images in rows and columns.
Args:
merged_image_paths (str): Paths of the merged images with the rdkit figure and legend.
output_path (dict): The path where the arranged output should be saved.
merged_image_paths (list): Paths of the merged images with the rdkit figure and legend.
output_path (dict): The paths where the arranged output should be saved.
"""
# Open the list of images
merged_images = [Image.open(path) for path in merged_image_paths]
Expand Down
4 changes: 2 additions & 2 deletions openmmdl/openmmdl_analysis/visualization_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def visualization(
width (str, optional): width of the visualization. Defaults to '1000px'.
Returns:
nglview widget: returns the nglview widget containing the visualization
nglview widget: returns an nglview.widget object containing the visualization
"""
with open("clouds.json") as f:
data = json.load(f)
Expand Down Expand Up @@ -254,7 +254,7 @@ def visualization(


def run_visualization():
"""Runs the visualization notebook in the current directory. The notebook is copied from the package directory to the current directory."""
"""Runs the visualization notebook in the current directory. The visualization notebook is copied from the package directory to the current directory and automaticaly started."""
package_dir = os.path.dirname(__file__)
notebook_path = os.path.join(package_dir, "visualization.ipynb")
current_dir = os.getcwd()
Expand Down
1 change: 0 additions & 1 deletion openmmdl/openmmdl_setup/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@


Loading

0 comments on commit 0a5bf50

Please sign in to comment.