Merge pull request #66 from NDoering99/main

Update API documentation, discription errors in openmmdl and format w…
wolberlab · Jan 26, 2024 · 0a5bf50 · 0a5bf50
2 parents d2ff963 + 007c4ad
commit 0a5bf50
Show file tree

Hide file tree

Showing 34 changed files with 5,228 additions and 2,262 deletions.
diff --git a/docs/openmmdl_analysis_functions.rst b/docs/openmmdl_analysis_functions.rst
diff --git a/openmmdl/_version.py b/openmmdl/_version.py
@@ -1 +1 @@
-__version__ = "1.0.0+648.gd517f79.dirty"
+__version__ = "1.0.0+677.ge4e8207.dirty"
diff --git a/openmmdl/openmmdl_analysis/barcode_generation.py b/openmmdl/openmmdl_analysis/barcode_generation.py
@@ -185,7 +185,7 @@ def plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interact
         )
 
 
-def plot_bacodes_grouped(interactions, df_all, interaction_type, peptide=False):
+def plot_bacodes_grouped(interactions, df_all, interaction_type):
     """generates barcode figures and groups them by ligandatom, aswell as total interaction barcode for a giveen lingenatom.
 
     Args:

diff --git a/openmmdl/openmmdl_analysis/binding_mode_processing.py b/openmmdl/openmmdl_analysis/binding_mode_processing.py
@@ -271,7 +271,7 @@ def filtering_values(threshold, frames, df, unique_columns_rings_grouped):
         unique_columns_rings_grouped (dict): Dictionary containing the grouped and unique values otained from gather_interactions.
 
     Returns:
-        dict: A dictionary with a single key named 'all' that contains a list of all combined values from all the sub-dictionaries.
+        list: A list of values, with unique values and their corresponding occurence counts.
     """
     # Call the function to remove duplicate keys
     unique_data = remove_duplicate_values(unique_columns_rings_grouped)

diff --git a/openmmdl/openmmdl_analysis/find_stable_waters.py b/openmmdl/openmmdl_analysis/find_stable_waters.py
@@ -11,8 +11,8 @@
 def trace_waters(topology, trajectory, output_directory):
     """trace the water molecules in a trajectory and write all which move below one Angstrom distance. To adjust the distance alter the integer
     Args:
-        topology (pdb_file_name): Path to the topology file.
-        trajectory (dcd_file_name): Path to the trajectory file.
+        topology (str): Path to the topology file.
+        trajectory (str): Path to the trajectory file.
         output_directory (str): Directory where output files will be saved.
 
     Returns:
@@ -96,14 +96,13 @@ def perform_clustering_and_writing(
     stable_waters, cluster_eps, total_frames, output_directory
 ):
     """
+    Perform DBSCAN clustering on the stable water coordinates, and write the clusters and their representatives to PDB files.
+
     Args:
         stable_waters (pd.DataFrame): DataFrame containing stable water coordinates.
         cluster_eps (float): DBSCAN clustering epsilon parameter. This is in Angstrom in this case, and defines which Water distances should be within one cluster
         total_frames (int): Total number of frames.
         output_directory (str): Directory where output files will be saved.
-
-    Returns:
-        None, it writes files.
     """
     # Feature extraction: XYZ coordinates
     X = stable_waters[["Oxygen_X", "Oxygen_Y", "Oxygen_Z"]]
@@ -136,13 +135,13 @@ def write_pdb_clusters_and_representatives(
     clustered_waters, min_samples, output_sub_directory
 ):
     """
+    Writes the clusters and their representatives to PDB files.
+
     Args:
         clustered_waters (pd.DataFrame): DataFrame containing clustered water coordinates.
         min_samples (int): Minimum number of samples for DBSCAN clustering.
         output_sub_directory (str): Subdirectory where output PDB files will be saved.
 
-    Returns:
-        None, it will output PDB files.
     """
     atom_counter = 1
     pdb_file_counter = 1
@@ -187,13 +186,11 @@ def stable_waters_pipeline(
 ):
     """Function to run the pipeline to extract stable water clusters, and their representatives from a PDB & DCD file
     Args:
-        topology (PDB_file_name): Path to the topology file.
-        trajectory (DCD_file_name): Path to the trajectory file.
+        topology (str): Path to the topology file.
+        trajectory (str): Path to the trajectory file.
         water_eps (float): DBSCAN clustering epsilon parameter.
         output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters".
 
-    Returns:
-        None, it starts the pipeline which will create output files.
     """
     # Load the PDB and DCD files
     output_directory += "_clusterEps_"
@@ -211,10 +208,10 @@ def stable_waters_pipeline(
 def filter_and_parse_pdb(protein_pdb):
     """This function reads in a PDB and returns the structure with bioparser.
     Args:
-        protein_pdb (PDB_file_path): Path to a protein PDB file.
+        protein_pdb (str): Path to a protein PDB file.
 
     Returns:
-        Structure: PDB structure object.
+        biopython.structure: PDB structure object.
     """
     with open(protein_pdb, "r") as pdb_file:
         lines = [
@@ -243,8 +240,8 @@ def filter_and_parse_pdb(protein_pdb):
 def find_interacting_residues(structure, representative_waters, distance_threshold):
     """This function maps waters (e.g. the representative waters) to interacting residues of a different PDB structure input. Use "filter_and_parse_pdb" to get the input for this function
     Args:
-        structure (Structure): PDB structure object.
-        representative_waters (pd.DataFrame): DataFrame containing representative water coordinates.
+        structure (biopython.structure): Biopython PDB structure object.
+        representative_waters (pandasd.DataFrame): DataFrame containing representative water coordinates.
         distance_threshold (float): Threshold distance for identifying interacting residues.
 
     Returns:
@@ -289,7 +286,7 @@ def read_pdb_as_dataframe(pdb_file):
         pdb_file (str): Path to the PDB file.
 
     Returns:
-        pd.DataFrame: DataFrame containing PDB data.
+        pandas.DataFrame: DataFrame containing PDB data.
     """
     lines = []
     with open(pdb_file, "r") as f:
@@ -326,9 +323,6 @@ def analyze_protein_and_water_interaction(
         cluster_eps (float): DBSCAN clustering epsilon parameter.
         output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters".
         distance_threshold (float, optional): Threshold distance for identifying interacting residues. Default is 5.0 (Angstrom).
-
-    Returns:
-        None, it will write a csv file.
     """
     output_directory += "_clusterEps_"
     strEps = str(cluster_eps).replace(".", "")

diff --git a/openmmdl/openmmdl_analysis/interaction_gathering.py b/openmmdl/openmmdl_analysis/interaction_gathering.py
@@ -164,7 +164,7 @@ def change_lig_to_residue(file_path, old_residue_name, new_residue_name):
     Args:
         file_path (str): Filepath of the topology file.
         old_residue_name (str): Residue name of the ligand.
-        new_residue_name (str): New residue name of the ligand now changed to mimic a residue.
+        new_residue_name (str): New residue name of the ligand now changed to mimic an amino acid residue.
     """
     with open(file_path, "r") as file:
         lines = file.readlines()
@@ -371,7 +371,7 @@ def process_trajectory(
 
     Args:
         pdb_md (mda universe): MDAnalysis Universe class representation of the topology and the trajectory of the file that is being processed.
-        dataframe (pandas dataframe): Name of a CSV file as str, where the interaction data will be read from if not None.
+        dataframe (str): Name of a CSV file as str, where the interaction data will be read from if not None.
         num_processes (int): The number of CPUs that will be used for the processing of the protein-ligand trajectory. Defaults to half of the CPUs in the system.
         lig_name (str): Name of the Ligand in the complex that will be analyzed.
         special_ligand (str): Name of the special ligand in the complex that will be analyzed.

diff --git a/openmmdl/openmmdl_analysis/markov_state_figure_generation.py b/openmmdl/openmmdl_analysis/markov_state_figure_generation.py
@@ -24,7 +24,7 @@ def min_transition_calculation(min_transition):
 
 
 def binding_site_markov_network(
-    total_frames, min_transitions, combined_dict, font_size=None, size_node=None
+    total_frames, min_transitions, combined_dict, font_size=12, size_node=200
 ):
     """Generate Markov Chain plots based on transition probabilities.
 

diff --git a/openmmdl/openmmdl_analysis/openmmdlanalysis.py b/openmmdl/openmmdl_analysis/openmmdlanalysis.py
@@ -2,6 +2,7 @@
 openmmdl_simulation.py
 Perform Simulations of Protein-ligand complexes with OpenMM
 """
+
 import argparse
 import sys
 import warnings
@@ -457,9 +458,9 @@ def main():
 
         # Check if the fingerprint has been encountered before
         if fingerprint in treshold_fingerprint_dict:
-            grouped_frames_treshold.at[
-                index, "Binding_fingerprint_treshold"
-            ] = treshold_fingerprint_dict[fingerprint]
+            grouped_frames_treshold.at[index, "Binding_fingerprint_treshold"] = (
+                treshold_fingerprint_dict[fingerprint]
+            )
         else:
             # Assign a new label if the fingerprint is new
             label = f"Binding_Mode_{label_counter}"

diff --git a/openmmdl/openmmdl_analysis/pml_writer.py b/openmmdl/openmmdl_analysis/pml_writer.py
@@ -12,7 +12,7 @@ def generate_pharmacophore_centers(df, interactions):
         interactions (list): list of interactions to generate pharmacophore from
 
     Returns:
-        dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value
+        dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value
     """
     coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
     pharmacophore = {}
@@ -44,7 +44,7 @@ def generate_pharmacophore_vectors(df, interactions):
         interactions (list): list of interactions to generate pharmacophore from
 
     Returns:
-        dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side)
+        dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side)
     """
     coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
     pharmacophore = {}
@@ -409,7 +409,7 @@ def generate_pharmacophore_centers_all_points(df, interactions):
         interactions (list): list of interactions to generate pharmacophore from
 
     Returns:
-        dict: interaction from which pharmacophore is generated as key and list of coordinates as value
+        dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value
     """
     coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)")
     pharmacophore = {}

diff --git a/openmmdl/openmmdl_analysis/preprocessing.py b/openmmdl/openmmdl_analysis/preprocessing.py
@@ -15,7 +15,7 @@ def increase_ring_indices(ring, lig_index):
 
     Args:
         ring (str): A list of atom indices belonging to a ring that need to be modified.
-        lig_index (str): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices
+        lig_index (int): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices
 
     Returns:
         list:  A new list with modified atom indicies.
@@ -68,11 +68,11 @@ def process_pdb_file(input_pdb_filename):
 
 
 def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname):
-    """Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself .
+    """Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself.
 
     Args:
         input_pdb_filename (str): name of the input PDB file
-        output_pdb_filename (str): name of the output PDB file
+        output_pdb_filename (str): name of the output SDF file
         target_resname (str): resname of the ligand in the original PDB file
     """
     # Load the PDB file using MDAnalysis
@@ -98,30 +98,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r
     os.remove("lig.pdb")
 
 
-def convert_pdb_to_sdf(input_pdb_filename, output_sdf_filename):
-    """Convert ligand PDB file to SDF file for analysis using Open Babel.
-
-    Args:
-        input_pdb_filename (str): name of the input PDB file
-        output_sdf_filename (str): name of the output SDF file
-    """
-    # Use subprocess to call Open Babel for the file format conversion
-    try:
-        subprocess.run(
-            ["obabel", input_pdb_filename, "-O", output_sdf_filename], check=True
-        )
-    except subprocess.CalledProcessError as e:
-        print(f"Error converting PDB to SDF: {e}")
-        return
-
-
 def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name):
     """Renumer the atoms of the ligand in the topology PDB file.
 
     Args:
         input_pdb_file (str): Path to the initial PDB file.
         output_pdb_file (str): Path to the output PDB file.
-        lig_name (str): Name of the ligand in the PDB file.
+        lig_name (str): Name of the ligand in the input PDB file.
     """
     # Read the input PDB file
     with open(input_pdb_file, "r") as f:
@@ -202,7 +185,7 @@ def move_hydrogens_to_end(structure, target_residue_name):
     """Moves hydrogens to the last lines of theresidue in the PDB file.
 
     Args:
-        structure (Biopython structure): Structure object containing the PDB file.
+        structure (Bio.structure): Structure object containing the PDB file.
         target_residue_name (str): Name of the residue in the PDB file.
     """
     # Counter for atom numbering within each residue

diff --git a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py
@@ -347,7 +347,7 @@ def create_and_merge_images(
         merged_image_paths (list): A list with the paths to the rdkit figures.
 
     Returns:
-        _type_: _description_
+        list: Paths to the merged images.
     """
     # Create the main figure and axis
     fig = pylab.figure()
@@ -440,8 +440,8 @@ def arranged_figure_generation(merged_image_paths, output_path):
     """Generate an arranged figure by arranging merged images in rows and columns.
 
     Args:
-        merged_image_paths (str): Paths of the merged images with the rdkit figure and legend.
-        output_path (dict): The path where the arranged output should be saved.
+        merged_image_paths (list): Paths of the merged images with the rdkit figure and legend.
+        output_path (dict): The paths where the arranged output should be saved.
     """
     # Open the list of images
     merged_images = [Image.open(path) for path in merged_image_paths]

diff --git a/openmmdl/openmmdl_analysis/visualization_functions.py b/openmmdl/openmmdl_analysis/visualization_functions.py
@@ -197,7 +197,7 @@ def visualization(
         width (str, optional): width of the visualization. Defaults to '1000px'.
 
     Returns:
-        nglview widget: returns the nglview widget containing the visualization
+        nglview widget: returns an nglview.widget object containing the visualization
     """
     with open("clouds.json") as f:
         data = json.load(f)
@@ -254,7 +254,7 @@ def visualization(
 
 
 def run_visualization():
-    """Runs the visualization notebook in the current directory. The notebook is copied from the package directory to the current directory."""
+    """Runs the visualization notebook in the current directory. The visualization notebook is copied from the package directory to the current directory and automaticaly started."""
     package_dir = os.path.dirname(__file__)
     notebook_path = os.path.join(package_dir, "visualization.ipynb")
     current_dir = os.getcwd()

diff --git a/openmmdl/openmmdl_setup/__init__.py b/openmmdl/openmmdl_setup/__init__.py
@@ -1,2 +1 @@
 
-