diff --git a/docs/openmmdl_analysis_functions.rst b/docs/openmmdl_analysis_functions.rst index fb3ac797..5ae77232 100644 --- a/docs/openmmdl_analysis_functions.rst +++ b/docs/openmmdl_analysis_functions.rst @@ -3,14 +3,700 @@ OpenMMDL Analysis functions This page displays all the functions of **OpenMMDL Analysis**. -openmmdl_simulation.scripts.cleaning_procedures +openmmdl_analysis.barcode_generation ------------------------------ -.. py:function:: cleanup(protein_name) +.. py:function:: barcodegeneration(df, interaction) - Cleans up the PDB Reporter Output File and MDTraj Files of the performed simulation. + Generates barcodes for a given interaction. :param str protein_name: Name of the protein PDB. + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param str interaction: name of the interaction to generate a barcode for - :returns: None. + :returns: binary array of wit 1 representing the interaction is present in the corresponding frame + :rtype: numpy.array + +.. py:function:: waterids_barcode_generator(df, interaction) + + Generates a barcode containing coresponding water ids for a given interaction. + + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param str interaction: name of the interaction to generate a barcode for + + :returns: returns a list of waterids for the frames where the interaction is present 0 if no interaction present + :rtype: list + + +.. py:function:: plot_barcodes(barcodes, save_path) + + Generates picture of barcodes for interactions of a specific type. + + :param list barcodes: list of np arrays containing the barcodes for each interaction + :param str save_path: name of the file to save the picture to + + :returns: None + :rtype: None + +.. py:function:: plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interactions) + + Generates piecharts for each waterbridge interaction with the water ids of the interacting waters. + + :param pandas.dataframe df_all: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param list waterbridge_barcodes: list of np arrays containing the barcodes for each waterbridge interaction + :param list waterbridge_interactions: list of strings containing waterbridge interactions + + :returns: None + :rtype: None + +.. py:function:: plot_bacodes_grouped(interactions, df_all, interaction_type) + + Generates barcode figures and groups them by ligandatom, aswell as total interaction barcode for a giveen lingenatom. + + :param list interactions: list of pandas.indexes that contain the interactions to generate barcodes for + :param pandas.dataframe df_all: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param str interaction_type: name of the interaction to generate a barcode for + + :returns: None + :rtype: None + +openmmdl_analysis.binding_mode_processing +------------------------------------------ + +.. py:function:: gather_interactions(df, ligand_rings, peptide=None) + + Process a DataFrame with the protein-ligand interaction and generate column names for each unique interaction. + + :param pandas.dataframe df: DataFrame that contains the interaction data for the whole trajectory. + :param list ligand_rings: List of the ligand ring information to recognize the atom numbers belonging to rings for hydrophobic interactions. + :param str peptide: Name of the peptide chain in the protein. If None, the peptide chain is not considered. + + :returns: A dictionary with the keys being 'FRAME' numbers and values being dictionaries containing row indices and their corresponding unique column names for interactions. + :rtype: dict + +.. py:function:: remove_duplicate_values(data) + + Remove the duplicate values from sub-dictionaries within the input dictionary. + + :param dict data: The input dictionary containing sub-dictionaries with possible duplicate values. + + :returns: A dictionary without duplicate values. + :rtype: dict + +.. py:function:: combine_subdict_values(data) + + Combines the values from the individual sub-dictionaries into a single list. + + :param dict data: Dictionary with values that are sub-dictionaries. + + :returns: A dictionary with a single key named 'all' that contains a list of all combined values from all the sub-dictionaries. + :rtype: dict + +.. py:function:: filtering_values(threshold, frames, df, unique_columns_rings_grouped) + + Filter and append values (interactions) to a DataFrame based on occurrence counts. + + :param float threshold: A treshold value that is used for filtering of the values (interactions) based upon the occurence count. + :param int frames: The number of frames that is used to calculate the treshold. + :param pandas.dataframe df: DataFrame to which the filtered values (interactions) will be added. + :param dict unique_columns_rings_grouped: Dictionary containing the grouped and unique values otained from gather_interactions. + + :returns: A list of values, with unique values and their corresponding occurence counts. + :rtype: list + +.. py:function:: unique_data_generation(filtered_values) + + :param list filtered_values: A list of values, where the unique interactions are extracted from. + + :returns: A dictionary containing the filtered unique interactions. + :rtype: dict + +.. py:function:: df_iteration_numbering(df, unique_data, peptide=None) + + Loop through the DataFrame and assign the values 1 and 0 to the rows, depending if the corresponding interaction from unique data is present. + + :param pandas.dataframe df: DataFrame which has the interaction data for all of the frames. + :param dict unique_data: Dictionary that contains the unique interactions obtained from unique_data_generation. + :param str peptide: Name of the peptide chainid in the original topology. Defaults to None. If None, the peptide chain is not considered. + + :returns: None + :rtype: None + +.. py:function:: update_values(df, new, unique_data) + + Update the values in the input DataFrame based upon the frame values and an reference DataFrame. + + :param pandas.dataframe df: Input DataFrame that will be updated. + :param pandas.dataframe new: The reference DataFrame containing values that are used to update the input DataFrame. + :param dict unique_data: A dictionary containing keys that represent the specific unique column names that need to be updated in the input DataFrame. + + :returns: None + :rtype: None + +.. py:function:: calculate_representative_frame(traj, bmode_frame_list, lig) + + Calculates the most representative frame for a bindingmode. This is based uppon the averagwe RMSD of a frame to all other frames in the binding mode. + + :param mdanalysis.universe traj: MDAnalysis universe object containing the trajectory. + :param list bmode_frame_list: List of frames belonging to a binding mode. + :param str lig: Name of the ligand in the topology. + +openmmdl_analysis.find_stable_waters +------------------------------------------ + +.. py:function:: trace_waters(topology, trajectory, output_directory) + + Trace the water molecules in a trajectory and write all which move below one Angstrom distance. To adjust the distance alter the integer + + :param str topology: Path to the topology file. + :param str trajectory: Path to the trajectory file. + :param str output_directory: Path to the output directory. + + :returns: DataFrame containing stable water coordinates. + :rtype: pandas.DataFrame + :returns: Total number of frames. + :rtype: int + +.. py:function:: perform_clustering_and_writing(stable_waters, cluster_eps, total_frames, output_directory) + + Perform DBSCAN clustering on the stable water coordinates, and write the clusters and their representatives to PDB files. + + :param pandas.DataFrame stable_waters: DataFrame containing stable water coordinates. + :param float cluster_eps: DBSCAN clustering epsilon parameter. This is in Angstrom in this case, and defines which Water distances should be within one cluster + :param int total_frames: Total number of frames. + :param str output_directory: Path to the output directory. + + :returns: None + :rtype: None + +.. py:function:: write_pdb_clusters_and_representatives(clustered_waters, min_samples, output_sub_directory) + + Writes the clusters and their representatives to PDB files. + + :param pandas.dataframe clustered_waters: DataFrame containing clustered water coordinates. + :param int min_samples: DBSCAN clustering min_samples parameter. + :param str output_sub_directory: Path to the output subdirectory. + + :returns: None + :rtype: None + +.. py:function:: stable_waters_pipeline(topology, trajectory, water_eps, output_directory="./stableWaters") + + Function to run the pipeline to extract stable water clusters, and their representatives from a PDB & DCD file + + :param str topology: Path to the topology file. + :param str trajectory: Path to the trajectory file. + :param float water_eps: DBSCAN clustering epsilon parameter. + :param str output_directory: Path to the output directory. Optional, defaults to "./stableWaters" + + :returns: None + :rtype: None + +.. py:function:: filter_and_parse_pdb(protein_pdb) + + This function reads in a PDB and returns the structure with bioparser. + + :param str protein_pdb: Path to the PDB file. + + :returns: Biopython PDB Structure object. + :rtype: biopython.structure + +.. py:function:: find_interacting_residues(structure, representative_waters, distance_threshold) + + This function maps waters (e.g. the representative waters) to interacting residues of a different PDB structure input. Use "filter_and_parse_pdb" to get the input for this function + + :param biopython.structure structure: Biopython PDB Structure object. + :param pandas.dataframe representative_waters: DataFrame containing representative water coordinates. + :param float distance_threshold: Threshold distance for identifying interacting residues. + + :returns: Dictionary mapping cluster numbers to interacting residues. + :rtype: dict + +.. py:function:: read_pdb_as_dataframe(pdb_file) + + Helper function reading a PDB + + :param str pdb_file: Path to the PDB file. + + :returns: DataFrame containing PDB data. + :rtype: pandas.dataframe + +.. py:function:: analyze_protein_and_water_interaction(protein_pdb_file, representative_waters_file, cluster_eps, output_directory="./stableWaters", distance_threshold=5.0,) + + Analyse the interaction of residues to water molecules using a threshold that can be specified when calling the function + + :param str protein_pdb_file: Path to the protein PDB file without waters. + :param str representative_waters_file: Path to the representative waters PDB file, or any PDB file containing only waters + :param float cluster_eps: DBSCAN clustering epsilon parameter. + :param str output_directory: Path to the output directory. Optional, defaults to "./stableWaters" + :param float distance_threshold: Threshold distance for identifying interacting residues. Optional, defaults to 5.0 + + :returns: None + :rtype: None + +openmmdl_analysis.interaction_gathering +------------------------------------------ + +.. py:function:: characterize_complex(pdb_file, binding_site_id) + + Characterize the protein-ligand complex and return their interaction set + + :param str pdb_file: Path to the PDB file. + :param str binding_site_id: A string that specifies the identifier of the binding site + + :returns: A object representing the interactions if. If Binding site is not found returns None + :rtype: plip.pdb_complex.basic.interaction_sets + +.. py:function:: retrieve_plip_interactions(pdb_file, lig_name) + + Retrieves the interactions from PLIP. + + :param str pdb_file: Path to the PDB file. + :param str lig_name: Name of the ligand in the topology. + + :returns: A dictionary of the binding sites and the interactions. + :rtype: dict + +.. py:function:: retrieve_plip_interactions_peptide(pdb_file, peptide) + + Retrives the interactions from PLIP for a peptide. + + :param str pdb_file: Path to the PDB file. + :param str peptide: Name of the peptide chainid in the original topology. + + :returns: A dictionary of the binding sites and the interactions. + :rtype: dict + +.. py:function:: create_df_from_binding_site(selected_site_interactions, interaction_type="hbond") + + Creates a data frame from a binding site and interaction type. + + :param dict selected_site_interactions: Precaluclated interactions from PLIP for the selected site + :param str interaction_type: The interaction type of interest (default set to hydrogen bond). Defaults to "hbond". + + :returns: DataFrame with information retrieved from PLIP. + :rtype: pandas.DataFrame + +.. py:function:: change_lig_to_residue(file_path, old_residue_name, new_residue_name) + + Reformats the topology file to change the ligand to a residue. This is needed for interactions with special ligands such as metal ions. + + :param str file_path: Path to the topology file. + :param str old_residue_name: Name of the ligand in the topology. + :param str new_residue_name: New residue name of the ligand now changed to mimic an amino acid residue. + + :returns: None + :rtype: None + +.. py:function:: process_frame(frame, pdb_md, lig_name, special=None, peptide=None): + + Process a single frame of MD simulation. + + :param int frame: Number of frame to be processed. + :param mdanalysis.universe pdb_md: MDAnalysis universe object containing the trajectory. + :param str lig_name: Name of the ligand in the topology. + :param str special: Name of the special ligand in the topology. Defaults to None. + :param str peptide: Name of the peptide chainid in the original topology. Defaults to None. + + :returns: A dataframe conatining the interaction data for the processed frame. + :rtype: pandas.dataframe + +.. py:function:: process_frame_special(frame, pdb_md, lig_name, special=None) + + Function extension of process_frame to process special ligands. + + :param int frame: Number of the frame that will be processed. + :param mdanalysis.universe pdb_md: MDAnalysis universe object containing the trajectory. + :param str lig_name: Name of the ligand in the topology. + :param str special: Name of the special ligand in the topology. Defaults to None. + + :returns: list of dataframes containing the interaction data for the processed frame with the special ligand. + :rtype: list + +.. py:function:: process_frame_wrapper(args) + + Wrapper for the MD Trajectory procession. + + :param tuple args: Tuple containing (frame_idx: int - number of the frame to be processed, + pdb_md: mda.universe - MDA Universe class representation of the topology and the trajectory of the file that is being processed, + lig_name: str - Name of the ligand in the complex that will be analyzed, + special_ligand: str - Name of the special ligand that will be analysed, + peptide: str - Chainid of the peptide that will be analyzed) + + :returns: Tuple containing the frame index and the result of from the process_frame function. + :rtype: tuple + +.. py:function:: process_trajectory(pdb_md, dataframe, num_processes, lig_name, special_ligand, peptide) + + Process protein-ligand trajectory with multiple CPUs in parallel. + + :param mdanalysis.universe pdb_md: MDAnalysis universe object containing the trajectory. + :param str dataframe: Name of a CSV file as str, where the interaction data will be read from if not None. + :param int num_processes: Number of processes to be used for the parallelization. + :param str lig_name: Name of the ligand in the topology. + :param str special_ligand: Name of the special ligand in the topology. + :param str peptide: Name of the peptide chainid in the original topology. + + :returns: A DataFrame containing all the protein-ligand interaction data from the whole trajectory. + :rtype: pandas.dataframe + +.. py:function:: fill_missing_frames(df, md_len) + + Fills the frames with no interactions in the DataFrame with placeholder values. + + :param pandas.dataframe df: The input DataFrame with frames that have no Interactions + :param int md_len: The value that indicates the number of frames, thus allowing the function to loop through the DataFrame + + :returns: DataFrame with placeholder values in the frames with no interactions. + :rtype: pandas.dataframe + +openmmdl_analysis.markov_state_figure_generation +------------------------------------------------- + +.. py:function:: min_transition_calculation(min_transition) + + Calculates a list based on the minimum transition time provided values and returns it in factors 1, 2, 5, 10. + + :param int min_transition: The minimum tranisiton time input for the generation of the factors. + + :returns: List with the minimum transition time with factors 1, 2, 5, 10. + :rtype: list + +.. py:function:: binding_site_markov_network(total_frames, min_transitions, combined_dict, font_size=12, size_node=200) + + Generate Markov Chain plots based on transition probabilities. + + :param int total_frames: The number of frames in the protein-ligand MD simulation. + :param list min_transitions: List of transition tresholds in %. A Markov Chain plot will be generated for each of the tresholds. + :param dict combined_dict: A dictionary with the information of the Binding Modes and their order of appearance during the simulation for all frames. + :param int font_size: The font size for the node labels. The default value is set to 12. + :param int size_node: The size of the nodes in the Markov Chain plot. the default value is set to 200. + + :returns: None + :rtype: None + +openmmdl_analysis.pml_writer +----------------------------- + +.. py:function:: generate_pharmacophore_centers(df, interactions) + + Generates pharmacophore points for interactions that are points such as hydrophobic and ionic interactions + + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param list interactions: list of strings containing the interactions to generate pharmacophore points for + + :returns: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value + :rtype: dict + +.. py:function:: generate_pharmacophore_vectors(df, interactions) + + Generates pharmacophore points for interactions that are vectors such as hydrogen bond donors or acceptors + + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param list interactions: list of strings containing the interactions to generate pharmacophore points for + + :returns: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side) + :rtype: dict + +.. py:function:: generate_md_pharmacophore_cloudcenters(df, core_compound, output_filename, sysname, id_num=0) + + Generates pharmacophore from all interactions formed in the MD simulation. + A feature is generated for each interaction at the center of all its ocurrences. + + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param str core_compound: Name of the ligand. + :param str output_filename: Name of the output file. + :param str sysname: Name of the system. + :param int id_num: Number of the system. Defaults to 0. + + :returns: None + :rtype: None + +.. py:function:: generate_bindingmode_pharmacophore(dict_bindingmode, core_compound, sysname, outname, id_num=0) + + Generates pharmacophore from a binding mode and writes it to a .pml file + + :param dict dict_bindingmode: Dictionary containing all interactions of the bindingmode and thei coresponding ligand and protein coordinates. + :param str core_compound: Name of the ligand. + :param str sysname: Name of the system. + :param str outname: Name of the output file. + :param int id_num: Number of the system. Defaults to 0. + + :returns: None + :rtype: None + +.. py:function:: generate_pharmacophore_centers_all_points(df, interactions) + + Generates pharmacophore points for all interactions to generate point cloud. + + :param pandas.dataframe df: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param list interactions: list of strings containing the interactions to generate pharmacophore points for. + + :returns: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value + :rtype: dict + +.. py:function:: generate_point_cloud_pml(cloud_dict, sysname, outname) + + Generates pharmacophore point cloud and writes it to a .pml file + + :param dict cloud_dict: Dictionary containing all interactions of the trajectory and their corresponding ligand coordinates. + :param str sysname: Name of the system. + :param str outname: Name of the output file. + + :returns: None + :rtype: None + +openmmdl_analysis.preprocessing +-------------------------------- + +.. py:function:: increase_ring_indices(ring, lig_index) + + Increases the atom indices in a ring of the ligand obtained from the ligand to fit the atom indices present in the protein-ligand complex. + + :param str ring: A list of atom indices belonging to a ring that need to be modified. + :param int lig_index: An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices + + :returns: A new list with modified atom indicies. + :rtype: list + +.. py:function:: convert_ligand_to_smiles(input_sdf, output_smi) + + Converts ligand structures from an SDF file to SMILES :) format + + :param str input_sdf: Path to the input SDF file. + :param str output_smi: Path to the output SMILES file. + + :returns: None + :rtype: None + +.. py:function:: process_pdb_file(input_pdb_filename) + + Process a PDB file to make it compatible with the openmmdl_analysis package. + + :param str input_pdb_filename: Path to the input PDB file. + + :returns: None + :rtype: None + +.. py:function:: extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname) + + Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself. + + :param str input_pdb_filename: Path to the input PDB file. + :param str output_filename: Path to the output SDF file. + :param str target_resname: Name of the ligand in the target PDB file. + + :returns: None + :rtype: None + +.. py:function:: renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name) + + Renumer the atoms of the ligand in the topology PDB file. + + :param str input_pdb_file: Path to the input PDB file. + :param str output_pdb_file: Path to the output PDB file. + :param str lig_name: Name of the ligand in the input PDB file. + + :returns: None + :rtype: None + +.. py:function:: replace_atom_type(data) + + Replace wrong ligand atom types in the topology PDB file. + + :param str data: Text of the initial PDB file. + + :returns: Edited text of the PDB file. + :rtype: str + +.. py:function:: process_pdb(input_file, output_file) + + Wrapper function to process a PDB file. + + :param str input_file: Path to the input PDB file. + :param str output_file: Path to the output PDB file. + + :returns: None :rtype: None + +.. py:function:: move_hydrogens_to_end(structure, target_residue_name) + + Moves hydrogens to the last lines of theresidue in the PDB file. + + :param biopython.structure structure: Biopython PDB Structure object. + :param str target_residue_name: Name of the target residue in the input PDB file. + + :returns: None + :rtype: None + +openmmdl_analysis.rdkit_figure_generation +------------------------------------------ + +.. py:function:: generate_ligand_image(ligand_name, complex_pdb_file, ligand_no_h_pdb_file, smiles_file, output_png_filename) + + Generates a PNG image of the ligand. + + :param str ligand_name: Name of the ligand in the topology. + :param str complex_pdb_file: Path to the PDB file of the protein-ligand complex. + :param str ligand_no_h_pdb_file: Path to the PDB file of the ligand without hydrogens. + :param str smiles_file: Path to the SMILES file of the ligand. + :param str output_png_filename: Path to the output PNG file. + + :returns: None + :rtype: None + +.. py:function:: split_interaction_data(data) + + Splits the Input which consists of the ResNr and ResType, Atom indices, interaction type in multiple parts. + + :param list data: A list of ResNr and ResType, Atom indices, interaction type that needs to be split. + + :returns: A new list of the interaction data that consists of three parts, being the protein_partner_name that represents the interacting protein residue, numeric codes, that represent the atom indices of the interacting atoms of the ligand and the interaction type. + :rtype: list + +.. py:function:: highlight_numbers(split_data, starting_idx) + + Extracts the data from the split_data output of the interactions and categorizes it to its respective list. + + :param list split_data: A list of interaction data items, where each item contains information about protein partner name, numeric codes and interaction type. + :param list starting_idx: Starting index of the ligand atom indices used for identifying the correct atom to highlight. + + :returns: A tuple that contains list of all of the highlighted atoms of all of the interactions. + - highlighted_hbond_donor (list of int): Atom indices for hydrogen bond donors. + - highlighted_hbond_acceptor (list of int): Atom indices for hydrogen bond acceptors. + - highlighted_hbond_both (list of int): Atom indices for interactions that are both donors and acceptors. + - highlighted_hydrophobic (list of int): Atom indices for hydrophobic interactions. + - highlighted_waterbridge (list of int): Atom indices for water-bridge interactions. + - highlighted_pistacking (list of int): Atom indices for pi-stacking interactions. + - highlighted_halogen (list of int): Atom indices for halogen interactions. + - highlighted_ni (list of int): Atom indices for negative ionizable salt bridge interactions. + - highlighted_pi (list of int): Atom indices for positive ionizable salt bridge interactions. + - highlighted_pication (list of int): Atom indices for pi-cation interactions. + - highlighted_metal (list of int): Atom indices for metal interactions. + :rtype: tuple + +.. py:function:: generate_interaction_dict(interaction_type, keys) + + Generates a dictionary of interaction RGB color model based on the provided interaction type. + + :param str interaction_type: The type of the interaction, for example 'hydrophobic'. + :param list keys: List of the highlighted atoms that display an interaction. + + :returns: A dictionary with the interaction types are associated with their respective RGB color codes. + :rtype: dict + +.. py:function:: update_dict(target_dict, *source_dicts) + + Updates the dictionary wth the keys and values from other dictionaries. + + :param dict target_dict: The dictionary that needs to be updated with new keys and values. + :param dict source_dicts: One or multiple dictionaries that are used to update the target dictionary with new keys and values. + + :returns: None + :rtype: None + +.. py:function:: create_and_merge_images(binding_mode, occurrence_percent, split_data, merged_image_paths) + + Create and merge images to generate a legend for binding modes. + + :param str binding_mode: Name of the binding mode. + :param float occurrence_percent: Percentage of the binding mode occurrence. + :param list split_data: Data of the interactions used to generate the legend. + :param list merged_image_paths: A list with the paths to the rdkit figures. + + :returns: Paths to the merged images. + :rtype: list + +.. py:function:: arranged_figure_generation(merged_image_paths, output_path) + + Generate an arranged figure by arranging merged images in rows and columns. + + :param list merged_image_paths: Paths of the merged images with the rdkit figure and legend. + :param dict output_path: The paths where the arranged output should be saved. + + :returns: None + :rtype: None + +openmmdl_analysis.rmsd_calculation +----------------------------------- + +.. py:function:: rmsd_for_atomgroups(prot_lig_top_file, prot_lig_traj_file, selection1, selection2=None) + + Calulate the RMSD for selected atom groups, and save the csv file and plot. + + :param str prot_lig_top_file: Path to the topology file. + :param str prot_lig_traj_file: Path to the trajectory file. + :param str selection1: Selection string for main atom group, also used during alignment. + :param list selection2: Selection strings for additional atom groups. Defaults to None. + + :returns: DataFrame containing RMSD of the selected atom groups over time. + :rtype: pandas.dataframe + +.. py:function:: RMSD_dist_frames(prot_lig_top_file, prot_lig_traj_file, lig, nucleic=False) + + Calculate the RMSD between all frames in a matrix. + + :param str prot_lig_top_file: Path to the topology file. + :param str prot_lig_traj_file: Path to the trajectory file. + :param str lig: Name of the ligand in the topology. + :param bool nucleic: Boolean to indicate if the receptor is a nucleic acid. Defaults to False. + + :returns: pairwise_rmsd_prot. Numpy array of RMSD values for pairwise protein structures. + :rtype: numpy.array + :returns: pairwise_rmsd_lig. Numpy array of RMSD values for ligand structures. + :rtype: numpy.array + +openmmdl_analysis.visualization_functions +------------------------------------------ + +.. py:function:: interacting_water_ids(df_all, waterbridge_interactions) + + Generates a list of all water ids that form water bridge interactions. + + :param pandas.dataframe df_all: Dataframe containing all interactions from plip analysis (typicaly df_all) + :param list waterbridge_interactions: list of strings containing waterbridge interactions + + :returns: list of all unique water ids that form water bridge interactions + :rtype: list + +.. py:function:: save_interacting_waters_trajectory(pdb_file_path, dcd_file_path, interacting_waters, ligname, special, outputpath="./Visualization/",) + + Saves .pdb and .dcd files of the trajectory containing ligand, receptor and all interacting waters. + + :param str pdb_file_path: Path to the original PDB file. + :param str dcd_file_path: Path to the original DCD file. + :param list interacting_waters: List of all interacting water ids + :param str ligname: Name of the ligand in the topology. + :param str special: Name of the special ligand in the topology. + :param str outputpath: Path to the output directory. Optional, defaults to "./Visualization/" + + :returns: None + :rtype: None + +.. py:function:: cloud_json_generation(df_all) + + Generates dict for visualization of interaction clouds. Later saved as .json file. + + :param pandas.dataframe df_all: Dataframe containing all interactions from plip analysis (typicaly df_all) + + :returns: Dict containing all interaction clouds + :rtype: dict + +.. py:function:: visualization(ligname, receptor_type="protein or nucleic", height="1000px", width="1000px") + + Generates visualization of the trajectory with the interacting waters and interaction clouds. + + :param str ligname: Name of the ligand in the topology. + :param str receptor_type: Type of the receptor. Defaults to "protein or nucleic". + :param str height: Height of the visualization. Defaults to "1000px". + :param str width: Width of the visualization. Defaults to "1000px". + + :returns: Returns an nglview.widget object containing the visualization + :rtype: nglview.widget + +.. py:function:: run_visualization() + + Runs the visualization notebook in the current directory. The visualization notebook is copied from the package directory to the current directory and automaticaly started. + + :returns: None + :rtype: None \ No newline at end of file diff --git a/openmmdl/_version.py b/openmmdl/_version.py index 94c5c941..23261bef 100644 --- a/openmmdl/_version.py +++ b/openmmdl/_version.py @@ -1 +1 @@ -__version__ = "1.0.0+648.gd517f79.dirty" +__version__ = "1.0.0+677.ge4e8207.dirty" diff --git a/openmmdl/openmmdl_analysis/barcode_generation.py b/openmmdl/openmmdl_analysis/barcode_generation.py index 1662645e..52fb4dfe 100644 --- a/openmmdl/openmmdl_analysis/barcode_generation.py +++ b/openmmdl/openmmdl_analysis/barcode_generation.py @@ -185,7 +185,7 @@ def plot_waterbridge_piechart(df_all, waterbridge_barcodes, waterbridge_interact ) -def plot_bacodes_grouped(interactions, df_all, interaction_type, peptide=False): +def plot_bacodes_grouped(interactions, df_all, interaction_type): """generates barcode figures and groups them by ligandatom, aswell as total interaction barcode for a giveen lingenatom. Args: diff --git a/openmmdl/openmmdl_analysis/binding_mode_processing.py b/openmmdl/openmmdl_analysis/binding_mode_processing.py index 13f8df81..8fb292c4 100644 --- a/openmmdl/openmmdl_analysis/binding_mode_processing.py +++ b/openmmdl/openmmdl_analysis/binding_mode_processing.py @@ -271,7 +271,7 @@ def filtering_values(threshold, frames, df, unique_columns_rings_grouped): unique_columns_rings_grouped (dict): Dictionary containing the grouped and unique values otained from gather_interactions. Returns: - dict: A dictionary with a single key named 'all' that contains a list of all combined values from all the sub-dictionaries. + list: A list of values, with unique values and their corresponding occurence counts. """ # Call the function to remove duplicate keys unique_data = remove_duplicate_values(unique_columns_rings_grouped) diff --git a/openmmdl/openmmdl_analysis/find_stable_waters.py b/openmmdl/openmmdl_analysis/find_stable_waters.py index 29e9c23e..f86652b1 100644 --- a/openmmdl/openmmdl_analysis/find_stable_waters.py +++ b/openmmdl/openmmdl_analysis/find_stable_waters.py @@ -11,8 +11,8 @@ def trace_waters(topology, trajectory, output_directory): """trace the water molecules in a trajectory and write all which move below one Angstrom distance. To adjust the distance alter the integer Args: - topology (pdb_file_name): Path to the topology file. - trajectory (dcd_file_name): Path to the trajectory file. + topology (str): Path to the topology file. + trajectory (str): Path to the trajectory file. output_directory (str): Directory where output files will be saved. Returns: @@ -96,14 +96,13 @@ def perform_clustering_and_writing( stable_waters, cluster_eps, total_frames, output_directory ): """ + Perform DBSCAN clustering on the stable water coordinates, and write the clusters and their representatives to PDB files. + Args: stable_waters (pd.DataFrame): DataFrame containing stable water coordinates. cluster_eps (float): DBSCAN clustering epsilon parameter. This is in Angstrom in this case, and defines which Water distances should be within one cluster total_frames (int): Total number of frames. output_directory (str): Directory where output files will be saved. - - Returns: - None, it writes files. """ # Feature extraction: XYZ coordinates X = stable_waters[["Oxygen_X", "Oxygen_Y", "Oxygen_Z"]] @@ -136,13 +135,13 @@ def write_pdb_clusters_and_representatives( clustered_waters, min_samples, output_sub_directory ): """ + Writes the clusters and their representatives to PDB files. + Args: clustered_waters (pd.DataFrame): DataFrame containing clustered water coordinates. min_samples (int): Minimum number of samples for DBSCAN clustering. output_sub_directory (str): Subdirectory where output PDB files will be saved. - Returns: - None, it will output PDB files. """ atom_counter = 1 pdb_file_counter = 1 @@ -187,13 +186,11 @@ def stable_waters_pipeline( ): """Function to run the pipeline to extract stable water clusters, and their representatives from a PDB & DCD file Args: - topology (PDB_file_name): Path to the topology file. - trajectory (DCD_file_name): Path to the trajectory file. + topology (str): Path to the topology file. + trajectory (str): Path to the trajectory file. water_eps (float): DBSCAN clustering epsilon parameter. output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters". - Returns: - None, it starts the pipeline which will create output files. """ # Load the PDB and DCD files output_directory += "_clusterEps_" @@ -211,10 +208,10 @@ def stable_waters_pipeline( def filter_and_parse_pdb(protein_pdb): """This function reads in a PDB and returns the structure with bioparser. Args: - protein_pdb (PDB_file_path): Path to a protein PDB file. + protein_pdb (str): Path to a protein PDB file. Returns: - Structure: PDB structure object. + biopython.structure: PDB structure object. """ with open(protein_pdb, "r") as pdb_file: lines = [ @@ -243,8 +240,8 @@ def filter_and_parse_pdb(protein_pdb): def find_interacting_residues(structure, representative_waters, distance_threshold): """This function maps waters (e.g. the representative waters) to interacting residues of a different PDB structure input. Use "filter_and_parse_pdb" to get the input for this function Args: - structure (Structure): PDB structure object. - representative_waters (pd.DataFrame): DataFrame containing representative water coordinates. + structure (biopython.structure): Biopython PDB structure object. + representative_waters (pandasd.DataFrame): DataFrame containing representative water coordinates. distance_threshold (float): Threshold distance for identifying interacting residues. Returns: @@ -289,7 +286,7 @@ def read_pdb_as_dataframe(pdb_file): pdb_file (str): Path to the PDB file. Returns: - pd.DataFrame: DataFrame containing PDB data. + pandas.DataFrame: DataFrame containing PDB data. """ lines = [] with open(pdb_file, "r") as f: @@ -326,9 +323,6 @@ def analyze_protein_and_water_interaction( cluster_eps (float): DBSCAN clustering epsilon parameter. output_directory (str, optional): Directory where output files will be saved. Default is "./stableWaters". distance_threshold (float, optional): Threshold distance for identifying interacting residues. Default is 5.0 (Angstrom). - - Returns: - None, it will write a csv file. """ output_directory += "_clusterEps_" strEps = str(cluster_eps).replace(".", "") diff --git a/openmmdl/openmmdl_analysis/interaction_gathering.py b/openmmdl/openmmdl_analysis/interaction_gathering.py index 51e6cd64..1a6095c9 100644 --- a/openmmdl/openmmdl_analysis/interaction_gathering.py +++ b/openmmdl/openmmdl_analysis/interaction_gathering.py @@ -164,7 +164,7 @@ def change_lig_to_residue(file_path, old_residue_name, new_residue_name): Args: file_path (str): Filepath of the topology file. old_residue_name (str): Residue name of the ligand. - new_residue_name (str): New residue name of the ligand now changed to mimic a residue. + new_residue_name (str): New residue name of the ligand now changed to mimic an amino acid residue. """ with open(file_path, "r") as file: lines = file.readlines() @@ -371,7 +371,7 @@ def process_trajectory( Args: pdb_md (mda universe): MDAnalysis Universe class representation of the topology and the trajectory of the file that is being processed. - dataframe (pandas dataframe): Name of a CSV file as str, where the interaction data will be read from if not None. + dataframe (str): Name of a CSV file as str, where the interaction data will be read from if not None. num_processes (int): The number of CPUs that will be used for the processing of the protein-ligand trajectory. Defaults to half of the CPUs in the system. lig_name (str): Name of the Ligand in the complex that will be analyzed. special_ligand (str): Name of the special ligand in the complex that will be analyzed. diff --git a/openmmdl/openmmdl_analysis/markov_state_figure_generation.py b/openmmdl/openmmdl_analysis/markov_state_figure_generation.py index bbe5e8e0..ec8856ca 100644 --- a/openmmdl/openmmdl_analysis/markov_state_figure_generation.py +++ b/openmmdl/openmmdl_analysis/markov_state_figure_generation.py @@ -24,7 +24,7 @@ def min_transition_calculation(min_transition): def binding_site_markov_network( - total_frames, min_transitions, combined_dict, font_size=None, size_node=None + total_frames, min_transitions, combined_dict, font_size=12, size_node=200 ): """Generate Markov Chain plots based on transition probabilities. diff --git a/openmmdl/openmmdl_analysis/openmmdlanalysis.py b/openmmdl/openmmdl_analysis/openmmdlanalysis.py index 7de01c6c..6e0e820a 100644 --- a/openmmdl/openmmdl_analysis/openmmdlanalysis.py +++ b/openmmdl/openmmdl_analysis/openmmdlanalysis.py @@ -2,6 +2,7 @@ openmmdl_simulation.py Perform Simulations of Protein-ligand complexes with OpenMM """ + import argparse import sys import warnings @@ -457,9 +458,9 @@ def main(): # Check if the fingerprint has been encountered before if fingerprint in treshold_fingerprint_dict: - grouped_frames_treshold.at[ - index, "Binding_fingerprint_treshold" - ] = treshold_fingerprint_dict[fingerprint] + grouped_frames_treshold.at[index, "Binding_fingerprint_treshold"] = ( + treshold_fingerprint_dict[fingerprint] + ) else: # Assign a new label if the fingerprint is new label = f"Binding_Mode_{label_counter}" diff --git a/openmmdl/openmmdl_analysis/pml_writer.py b/openmmdl/openmmdl_analysis/pml_writer.py index e157285c..11762609 100644 --- a/openmmdl/openmmdl_analysis/pml_writer.py +++ b/openmmdl/openmmdl_analysis/pml_writer.py @@ -12,7 +12,7 @@ def generate_pharmacophore_centers(df, interactions): interactions (list): list of interactions to generate pharmacophore from Returns: - dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value + dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value """ coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)") pharmacophore = {} @@ -44,7 +44,7 @@ def generate_pharmacophore_vectors(df, interactions): interactions (list): list of interactions to generate pharmacophore from Returns: - dict: interaction from wicht pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side) + dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value (first coords are ligand side, second are protein side) """ coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)") pharmacophore = {} @@ -409,7 +409,7 @@ def generate_pharmacophore_centers_all_points(df, interactions): interactions (list): list of interactions to generate pharmacophore from Returns: - dict: interaction from which pharmacophore is generated as key and list of coordinates as value + dict: Dict of interactions from which pharmacophore is generated as key and list of coordinates as value """ coord_pattern = re.compile(r"\(([\d.-]+), ([\d.-]+), ([\d.-]+)\)") pharmacophore = {} diff --git a/openmmdl/openmmdl_analysis/preprocessing.py b/openmmdl/openmmdl_analysis/preprocessing.py index f71026e4..ad468991 100644 --- a/openmmdl/openmmdl_analysis/preprocessing.py +++ b/openmmdl/openmmdl_analysis/preprocessing.py @@ -15,7 +15,7 @@ def increase_ring_indices(ring, lig_index): Args: ring (str): A list of atom indices belonging to a ring that need to be modified. - lig_index (str): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices + lig_index (int): An integer that is the first number of the ligand atom indices obtained from the protein-ligand, which is used to modify the ring indices Returns: list: A new list with modified atom indicies. @@ -68,11 +68,11 @@ def process_pdb_file(input_pdb_filename): def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_resname): - """Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself . + """Extract and save the ligand from the receptor ligand complex PDB file into a new PDB file by itself. Args: input_pdb_filename (str): name of the input PDB file - output_pdb_filename (str): name of the output PDB file + output_pdb_filename (str): name of the output SDF file target_resname (str): resname of the ligand in the original PDB file """ # Load the PDB file using MDAnalysis @@ -98,30 +98,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r os.remove("lig.pdb") -def convert_pdb_to_sdf(input_pdb_filename, output_sdf_filename): - """Convert ligand PDB file to SDF file for analysis using Open Babel. - - Args: - input_pdb_filename (str): name of the input PDB file - output_sdf_filename (str): name of the output SDF file - """ - # Use subprocess to call Open Babel for the file format conversion - try: - subprocess.run( - ["obabel", input_pdb_filename, "-O", output_sdf_filename], check=True - ) - except subprocess.CalledProcessError as e: - print(f"Error converting PDB to SDF: {e}") - return - - def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name): """Renumer the atoms of the ligand in the topology PDB file. Args: input_pdb_file (str): Path to the initial PDB file. output_pdb_file (str): Path to the output PDB file. - lig_name (str): Name of the ligand in the PDB file. + lig_name (str): Name of the ligand in the input PDB file. """ # Read the input PDB file with open(input_pdb_file, "r") as f: @@ -202,7 +185,7 @@ def move_hydrogens_to_end(structure, target_residue_name): """Moves hydrogens to the last lines of theresidue in the PDB file. Args: - structure (Biopython structure): Structure object containing the PDB file. + structure (Bio.structure): Structure object containing the PDB file. target_residue_name (str): Name of the residue in the PDB file. """ # Counter for atom numbering within each residue diff --git a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py index dd2e5d3d..859a211a 100644 --- a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py +++ b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py @@ -347,7 +347,7 @@ def create_and_merge_images( merged_image_paths (list): A list with the paths to the rdkit figures. Returns: - _type_: _description_ + list: Paths to the merged images. """ # Create the main figure and axis fig = pylab.figure() @@ -440,8 +440,8 @@ def arranged_figure_generation(merged_image_paths, output_path): """Generate an arranged figure by arranging merged images in rows and columns. Args: - merged_image_paths (str): Paths of the merged images with the rdkit figure and legend. - output_path (dict): The path where the arranged output should be saved. + merged_image_paths (list): Paths of the merged images with the rdkit figure and legend. + output_path (dict): The paths where the arranged output should be saved. """ # Open the list of images merged_images = [Image.open(path) for path in merged_image_paths] diff --git a/openmmdl/openmmdl_analysis/visualization_functions.py b/openmmdl/openmmdl_analysis/visualization_functions.py index 663a38c3..327f0459 100644 --- a/openmmdl/openmmdl_analysis/visualization_functions.py +++ b/openmmdl/openmmdl_analysis/visualization_functions.py @@ -197,7 +197,7 @@ def visualization( width (str, optional): width of the visualization. Defaults to '1000px'. Returns: - nglview widget: returns the nglview widget containing the visualization + nglview widget: returns an nglview.widget object containing the visualization """ with open("clouds.json") as f: data = json.load(f) @@ -254,7 +254,7 @@ def visualization( def run_visualization(): - """Runs the visualization notebook in the current directory. The notebook is copied from the package directory to the current directory.""" + """Runs the visualization notebook in the current directory. The visualization notebook is copied from the package directory to the current directory and automaticaly started.""" package_dir = os.path.dirname(__file__) notebook_path = os.path.join(package_dir, "visualization.ipynb") current_dir = os.getcwd() diff --git a/openmmdl/openmmdl_setup/__init__.py b/openmmdl/openmmdl_setup/__init__.py index 139597f9..8b137891 100644 --- a/openmmdl/openmmdl_setup/__init__.py +++ b/openmmdl/openmmdl_setup/__init__.py @@ -1,2 +1 @@ - diff --git a/openmmdl/openmmdl_setup/openmmdlsetup.py b/openmmdl/openmmdl_setup/openmmdlsetup.py index 462dbcb6..248ed4c6 100644 --- a/openmmdl/openmmdl_setup/openmmdlsetup.py +++ b/openmmdl/openmmdl_setup/openmmdlsetup.py @@ -1,8 +1,23 @@ import openmm as mm import openmm.unit as unit from openmm.app import PDBFile, PDBxFile -from pdbfixer.pdbfixer import PDBFixer, proteinResidues, dnaResidues, rnaResidues, _guessFileFormat -from flask import Flask, request, session, g, render_template, make_response, send_file, url_for +from pdbfixer.pdbfixer import ( + PDBFixer, + proteinResidues, + dnaResidues, + rnaResidues, + _guessFileFormat, +) +from flask import ( + Flask, + request, + session, + g, + render_template, + make_response, + send_file, + url_for, +) from werkzeug.utils import secure_filename from multiprocessing import Process, Pipe import datetime @@ -18,7 +33,7 @@ import zipfile -if sys.version_info >= (3,0): +if sys.version_info >= (3, 0): from io import StringIO else: from cStringIO import StringIO @@ -26,14 +41,15 @@ app = Flask(__name__) app.config.from_object(__name__) -app.config.update({'SECRET_KEY':'development key'}) -app.jinja_env.globals['mm'] = mm +app.config.update({"SECRET_KEY": "development key"}) +app.jinja_env.globals["mm"] = mm uploadedFiles = {} fixer = None scriptOutput = None simulationProcess = None + def saveUploadedFiles(): uploadedFiles.clear() for key in request.files: @@ -44,202 +60,238 @@ def saveUploadedFiles(): filelist.append((temp, secure_filename(file.filename))) uploadedFiles[key] = filelist -@app.route('/headerControls') + +@app.route("/headerControls") def headerControls(): - if 'startOver' in request.args: + if "startOver" in request.args: return showSelectFileType() - if 'quit' in request.args: - func = request.environ.get('werkzeug.server.shutdown') + if "quit" in request.args: + func = request.environ.get("werkzeug.server.shutdown") if func is None: - raise RuntimeError('Not running with the Werkzeug Server') + raise RuntimeError("Not running with the Werkzeug Server") func() return "OpenMM Setup has stopped running. You can close this window." -@app.route('/') + +@app.route("/") def showSelectFileType(): - return render_template('selectFileType.html') + return render_template("selectFileType.html") -@app.route('/selectFiles') + +@app.route("/selectFiles") def selectFiles(): - session['fileType'] = request.args.get('type', '') # get the value of `type` from the url + session["fileType"] = request.args.get( + "type", "" + ) # get the value of `type` from the url return showConfigureFiles() + def showConfigureFiles(): try: - fileType = session['fileType'] - if fileType == 'pdb': - return render_template('configurePdbFile.html') - elif fileType == 'amber': - return render_template('configureAmberFiles.html') + fileType = session["fileType"] + if fileType == "pdb": + return render_template("configurePdbFile.html") + elif fileType == "amber": + return render_template("configureAmberFiles.html") except: - app.logger.error('Error displaying configure files page', exc_info=True) + app.logger.error("Error displaying configure files page", exc_info=True) # The file type is invalid, so send them back to the select file type page. return showSelectFileType() + ################################################################################################# -@app.route('/configureFiles', methods=['POST']) +@app.route("/configureFiles", methods=["POST"]) def configureFiles(): - fileType = session['fileType'] - if fileType == 'pdb': - if 'file' not in request.files or request.files['file'].filename == '': + fileType = session["fileType"] + if fileType == "pdb": + if "file" not in request.files or request.files["file"].filename == "": # They didn't select a file. Send them back. return showConfigureFiles() saveUploadedFiles() - session['forcefield'] = request.form.get('forcefield', '') - session['ml_forcefield'] = request.form.get('ml_forcefield', '') - session['waterModel'] = request.form.get('waterModel', '') - session['ligandMinimization'] = request.form.get('ligandMinimization', '') - session['ligandSanitization'] = request.form.get('ligandSanitization', '') - session['sdfFile'] = uploadedFiles['sdfFile'][0][1] + session["forcefield"] = request.form.get("forcefield", "") + session["ml_forcefield"] = request.form.get("ml_forcefield", "") + session["waterModel"] = request.form.get("waterModel", "") + session["ligandMinimization"] = request.form.get("ligandMinimization", "") + session["ligandSanitization"] = request.form.get("ligandSanitization", "") + session["sdfFile"] = uploadedFiles["sdfFile"][0][1] configureDefaultOptions() - file, name = uploadedFiles['file'][0] + file, name = uploadedFiles["file"][0] file.seek(0, 0) - session['pdbType'] = _guessFileFormat(file, name) - if session['pdbType'] == 'pdb': + session["pdbType"] = _guessFileFormat(file, name) + if session["pdbType"] == "pdb": global fixer fixer = PDBFixer(pdbfile=file) return showSelectChains() - elif fileType == 'amber': - session['has_files'] = request.form.get('has_files', '') - has_files = session['has_files'] + elif fileType == "amber": + session["has_files"] = request.form.get("has_files", "") + has_files = session["has_files"] if has_files == "yes": - if 'prmtopFile' not in request.files or request.files['prmtopFile'].filename == '' or 'inpcrdFile' not in request.files or request.files['inpcrdFile'].filename == '': + if ( + "prmtopFile" not in request.files + or request.files["prmtopFile"].filename == "" + or "inpcrdFile" not in request.files + or request.files["inpcrdFile"].filename == "" + ): # if the user doesn't select prmtop or incprd file. Send them back. - return showConfigureFiles() - session['nmLig'] = 'nmLig' in request.form - session['spLig'] = 'spLig' in request.form - if session['nmLig']: + return showConfigureFiles() + session["nmLig"] = "nmLig" in request.form + session["spLig"] = "spLig" in request.form + if session["nmLig"]: # If the user doesn't type the resname of the ligand. Send them back. - if 'nmLigName' not in request.form or request.form['nmLigName'] == '': + if "nmLigName" not in request.form or request.form["nmLigName"] == "": return showConfigureFiles() else: - session['nmLigName'] = request.form.get('nmLigName', '') - if session['spLig']: - if 'spLigName' not in request.form or request.form['spLigName'] == '': + session["nmLigName"] = request.form.get("nmLigName", "") + if session["spLig"]: + if "spLigName" not in request.form or request.form["spLigName"] == "": return showConfigureFiles() else: - session['spLigName'] = request.form.get('spLigName', '') - saveUploadedFiles() - elif has_files == "no": + session["spLigName"] = request.form.get("spLigName", "") + saveUploadedFiles() + elif has_files == "no": configureDefaultAmberOptions() return showAmberOptions() - + configureDefaultOptions() return showSimulationOptions() -@app.route('/showAmberOptions') + +@app.route("/showAmberOptions") def showAmberOptions(): - return render_template('AmberOptions.html') + return render_template("AmberOptions.html") -@app.route('/setAmberOptions', methods=['POST']) + +@app.route("/setAmberOptions", methods=["POST"]) def setAmberOptions(): for key in request.form: session[key] = request.form[key] ######## Receptor ######## - session['rcpType'] = request.form.get('rcpType', '') # store the value of rcpType in session, e.g. protRcp, dnaRcp, rnaRcp, carboRcp - session['prot_ff'] = request.form.get('prot_ff', '') - session['other_prot_ff_input'] = request.form.get('other_prot_ff_input', '') - session['dna_ff'] = request.form.get('dna_ff', '') - session['other_dna_ff_input'] = request.form.get('other_dna_ff_input', '') - session['rna_ff'] = request.form.get('rna_ff', '') - session['other_rna_ff_input'] = request.form.get('other_rna_ff_input', '') - session['carbo_ff'] = request.form.get('carbo_ff', '') - session['other_carbo_ff_input'] = request.form.get('other_carbo_ff_input', '') + session["rcpType"] = request.form.get( + "rcpType", "" + ) # store the value of rcpType in session, e.g. protRcp, dnaRcp, rnaRcp, carboRcp + session["prot_ff"] = request.form.get("prot_ff", "") + session["other_prot_ff_input"] = request.form.get("other_prot_ff_input", "") + session["dna_ff"] = request.form.get("dna_ff", "") + session["other_dna_ff_input"] = request.form.get("other_dna_ff_input", "") + session["rna_ff"] = request.form.get("rna_ff", "") + session["other_rna_ff_input"] = request.form.get("other_rna_ff_input", "") + session["carbo_ff"] = request.form.get("carbo_ff", "") + session["other_carbo_ff_input"] = request.form.get("other_carbo_ff_input", "") # save uploaded pdb file for receptor - rcpType = session['rcpType'] - if rcpType == 'protRcp': - if 'protFile' not in request.files or request.files['protFile'].filename == '': + rcpType = session["rcpType"] + if rcpType == "protRcp": + if "protFile" not in request.files or request.files["protFile"].filename == "": showAmberOptions() saveUploadedFiles() - elif rcpType == 'dnaRcp': - if 'dnaFile' not in request.files or request.files['dnaFile'].filename == '': + elif rcpType == "dnaRcp": + if "dnaFile" not in request.files or request.files["dnaFile"].filename == "": showAmberOptions() saveUploadedFiles() - elif rcpType == 'rnaRcp': - if 'rnaFile' not in request.files or request.files['rnaFile'].filename == '': + elif rcpType == "rnaRcp": + if "rnaFile" not in request.files or request.files["rnaFile"].filename == "": showAmberOptions() saveUploadedFiles() - elif rcpType == 'carboRcp': - if 'carboFile' not in request.files or request.files['carboFile'].filename == '': + elif rcpType == "carboRcp": + if ( + "carboFile" not in request.files + or request.files["carboFile"].filename == "" + ): showAmberOptions() saveUploadedFiles() - + ######## Ligand ######## - session['nmLig'] = 'nmLig' in request.form # store whether the nmLig checkbox is checked, e.g. True or False - session['spLig'] = 'spLig' in request.form + session["nmLig"] = ( + "nmLig" in request.form + ) # store whether the nmLig checkbox is checked, e.g. True or False + session["spLig"] = "spLig" in request.form # save uploaded pdb or sdf file for ligand ## for normal ligand - if session['nmLig']: - if 'nmLigFile' not in request.files or request.files['nmLigFile'].filename == '': + if session["nmLig"]: + if ( + "nmLigFile" not in request.files + or request.files["nmLigFile"].filename == "" + ): showAmberOptions() saveUploadedFiles() ## for special ligand - if session['spLig']: - if 'spLigFile' not in request.files or request.files['spLigFile'].filename == '' or 'prepcFile' not in request.files or request.files['prepcFile'].filename == '' or 'frcmodFile' not in request.files or request.files['frcmodFile'].filename == '': + if session["spLig"]: + if ( + "spLigFile" not in request.files + or request.files["spLigFile"].filename == "" + or "prepcFile" not in request.files + or request.files["prepcFile"].filename == "" + or "frcmodFile" not in request.files + or request.files["frcmodFile"].filename == "" + ): showAmberOptions() saveUploadedFiles() - + ######## Add Water/Membrane ######## - session['addType'] = request.form.get('addType', '') - session['boxType'] = request.form.get('boxType', '') - session['dist'] = request.form.get('dist', '') - session['lipid_tp'] = request.form.get('lipid_tp', '') - session['other_lipid_tp_input'] = request.form.get('other_lipid_tp_input', '') - session['lipid_ratio'] = request.form.get('lipid_ratio', '') - session['lipid_ff'] = request.form.get('lipid_ff', '') - session['dist2Border'] = request.form.get('dist2Border', '') - session['padDist'] = request.form.get('padDist', '') - session['water_ff'] = request.form.get('water_ff', '') - session['pos_ion'] = request.form.get('pos_ion', '') - session['neg_ion'] = request.form.get('neg_ion', '') - session['ionConc'] = request.form.get('ionConc', '') - + session["addType"] = request.form.get("addType", "") + session["boxType"] = request.form.get("boxType", "") + session["dist"] = request.form.get("dist", "") + session["lipid_tp"] = request.form.get("lipid_tp", "") + session["other_lipid_tp_input"] = request.form.get("other_lipid_tp_input", "") + session["lipid_ratio"] = request.form.get("lipid_ratio", "") + session["lipid_ff"] = request.form.get("lipid_ff", "") + session["dist2Border"] = request.form.get("dist2Border", "") + session["padDist"] = request.form.get("padDist", "") + session["water_ff"] = request.form.get("water_ff", "") + session["pos_ion"] = request.form.get("pos_ion", "") + session["neg_ion"] = request.form.get("neg_ion", "") + session["ionConc"] = request.form.get("ionConc", "") + return createAmberBashScript() -@app.route('/downloadAmberBashScript') +@app.route("/downloadAmberBashScript") def downloadAmberBashScript(): response = make_response(createAmberBashScript()) - response.headers['Content-Disposition'] = 'attachment; filename="run_ambertools.sh"' + response.headers["Content-Disposition"] = 'attachment; filename="run_ambertools.sh"' return response + def configureDefaultAmberOptions(): """Select default options based on the file format and force field.""" # Ligand - session['nmLig'] = '' - session['spLig'] = '' - session['lig_ff'] = 'gaff2' - session['charge_value'] = '0' - session['charge_method'] = 'bcc' - + session["nmLig"] = "" + session["spLig"] = "" + session["lig_ff"] = "gaff2" + session["charge_value"] = "0" + session["charge_method"] = "bcc" + # Receptor - session['prot_ff'] = 'ff19SB' - session['dna_ff'] = 'OL15' - session['rna_ff'] = 'OL3' - session['carbo_ff'] = 'GLYCAM_06j' - + session["prot_ff"] = "ff19SB" + session["dna_ff"] = "OL15" + session["rna_ff"] = "OL3" + session["carbo_ff"] = "GLYCAM_06j" + # AddWaterMembrane - session['addType'] = 'addWater' - session['boxType'] = 'cube' - session['dist'] ='10' - - session['lipid_tp'] = 'POPC' - session['other_lipid_tp_input'] = 'POPC:TOPC' - session['lipid_ratio'] = '1:1' - session['lipid_ff'] = 'lipid21' - session['dist2Border'] = '15' - session['padDist'] = '17' - - session['water_ff'] = 'opc' - session['pos_ion'] = 'Na+' - session['neg_ion'] = 'Cl-' - session['ionConc'] = '0.15' - + session["addType"] = "addWater" + session["boxType"] = "cube" + session["dist"] = "10" + + session["lipid_tp"] = "POPC" + session["other_lipid_tp_input"] = "POPC:TOPC" + session["lipid_ratio"] = "1:1" + session["lipid_ff"] = "lipid21" + session["dist2Border"] = "15" + session["padDist"] = "17" + + session["water_ff"] = "opc" + session["pos_ion"] = "Na+" + session["neg_ion"] = "Cl-" + session["ionConc"] = "0.15" + + def createAmberBashScript(): a_script = [] - a_script.append('# This script was generated by OpenMMDL-Setup on %s.\n' % datetime.date.today()) - a_script.append(''' + a_script.append( + "# This script was generated by OpenMMDL-Setup on %s.\n" % datetime.date.today() + ) + a_script.append( + """ # ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. # .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| # / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) @@ -251,303 +303,427 @@ def createAmberBashScript(): # '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` - ''') + """ + ) - a_script.append('#!/bin/bash\n') + a_script.append("#!/bin/bash\n") # Receptor - a_script.append('################################## Receptor ######################################') - rcpType = session['rcpType'] - if rcpType == 'protRcp': - protFile = uploadedFiles['protFile'][0][1] + a_script.append( + "################################## Receptor ######################################" + ) + rcpType = session["rcpType"] + if rcpType == "protRcp": + protFile = uploadedFiles["protFile"][0][1] protFile = protFile[:-4] - a_script.append('rcp_nm=%s # the file name of ligand without suffix `pdb`' % protFile ) - - prot_ff = session['prot_ff'] - if prot_ff != 'other_prot_ff': - a_script.append('rcp_ff=%s' % session['prot_ff']) - elif prot_ff == 'other_prot_ff': - a_script.append('rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_prot_ff_input']) - a_script.append('\n') - - elif rcpType == 'dnaRcp': - dnaFile = uploadedFiles['dnaFile'][0][1] + a_script.append( + "rcp_nm=%s # the file name of ligand without suffix `pdb`" % protFile + ) + + prot_ff = session["prot_ff"] + if prot_ff != "other_prot_ff": + a_script.append("rcp_ff=%s" % session["prot_ff"]) + elif prot_ff == "other_prot_ff": + a_script.append( + "rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_prot_ff_input"] + ) + a_script.append("\n") + + elif rcpType == "dnaRcp": + dnaFile = uploadedFiles["dnaFile"][0][1] dnaFile = dnaFile[:-4] - a_script.append('rcp_nm=%s # the file name of ligand without suffix `pdb`' % dnaFile ) - - dna_ff = session['dna_ff'] - if dna_ff != 'other_dna_ff': - a_script.append('rcp_ff=%s' % session['dna_ff']) - elif dna_ff == 'other_dna_ff': - a_script.append('rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_dna_ff_input']) - a_script.append('\n') - - elif rcpType == 'rnaRcp': - rnaFile = uploadedFiles['rnaFile'][0][1] + a_script.append( + "rcp_nm=%s # the file name of ligand without suffix `pdb`" % dnaFile + ) + + dna_ff = session["dna_ff"] + if dna_ff != "other_dna_ff": + a_script.append("rcp_ff=%s" % session["dna_ff"]) + elif dna_ff == "other_dna_ff": + a_script.append( + "rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_dna_ff_input"] + ) + a_script.append("\n") + + elif rcpType == "rnaRcp": + rnaFile = uploadedFiles["rnaFile"][0][1] rnaFile = rnaFile[:-4] - a_script.append('rcp_nm=%s # the file name of ligand without suffix `pdb`' % rnaFile ) - - rna_ff = session['rna_ff'] - if rna_ff != 'other_rna_ff': - a_script.append('rcp_ff=%s' % session['rna_ff']) - elif rna_ff == 'other_rna_ff': - a_script.append('rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_rna_ff_input']) - a_script.append('\n') - - elif rcpType == 'carboRcp': - carboFile = uploadedFiles['carboFile'][0][1] + a_script.append( + "rcp_nm=%s # the file name of ligand without suffix `pdb`" % rnaFile + ) + + rna_ff = session["rna_ff"] + if rna_ff != "other_rna_ff": + a_script.append("rcp_ff=%s" % session["rna_ff"]) + elif rna_ff == "other_rna_ff": + a_script.append( + "rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_rna_ff_input"] + ) + a_script.append("\n") + + elif rcpType == "carboRcp": + carboFile = uploadedFiles["carboFile"][0][1] carboFile = carboFile[:-4] - a_script.append('rcp_nm=%s # the file name of ligand without suffix `pdb`' % carboFile ) - - carbo_ff = session['carbo_ff'] - if carbo_ff != 'other_carbo_ff': - a_script.append('rcp_ff=%s' % session['carbo_ff']) - elif carbo_ff == 'other_carbo_ff': - a_script.append('rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_carbo_ff_input']) - a_script.append('\n') - - a_script.append('## Clean the PDB file by pdb4amber') - a_script.append(('pdb4amber -i ${rcp_nm}.pdb -o ${rcp_nm}_amber.pdb')) - a_script.append(''' + a_script.append( + "rcp_nm=%s # the file name of ligand without suffix `pdb`" % carboFile + ) + + carbo_ff = session["carbo_ff"] + if carbo_ff != "other_carbo_ff": + a_script.append("rcp_ff=%s" % session["carbo_ff"]) + elif carbo_ff == "other_carbo_ff": + a_script.append( + "rcp_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_carbo_ff_input"] + ) + a_script.append("\n") + + a_script.append("## Clean the PDB file by pdb4amber") + a_script.append(("pdb4amber -i ${rcp_nm}.pdb -o ${rcp_nm}_amber.pdb")) + a_script.append( + """ ## `tleap` requires that all residues and atoms have appropriate types to ensure compatibility with the specified force field. ## To avoid `tleap` failing, we delete non-essential atoms, such as hydrogens, but preserve important atoms like carbon and nitrogen within the caps residues. -## Don' worry about the missing atoms as tleap has the capability to reconstruct them automatically. ''') - a_script.append('''awk '! ($2 ~ "(CH3|HH31|HH32|HH33)" || $3 ~ "(CH3|HH31|HH32|HH33)" )' ${rcp_nm}_amber.pdb > ${rcp_nm}_amber_f.pdb ''') +## Don' worry about the missing atoms as tleap has the capability to reconstruct them automatically. """ + ) + a_script.append( + """awk '! ($2 ~ "(CH3|HH31|HH32|HH33)" || $3 ~ "(CH3|HH31|HH32|HH33)" )' ${rcp_nm}_amber.pdb > ${rcp_nm}_amber_f.pdb """ + ) a_script.append("grep -v '^CONECT' ${rcp_nm}_amber_f.pdb > ${rcp_nm}_cnt_rmv.pdb\n") # Ligand - if session['nmLig'] or session['spLig']: - a_script.append('################################## Ligand ######################################') - if session['nmLig']: - a_script.append('# Normal Ligand') - nmLigFile = uploadedFiles['nmLigFile'][0][1] - a_script.append('nmLigFile=%s # the file name of ligand without suffix `.pdb` or `.sdf`' % nmLigFile[:-4] ) + if session["nmLig"] or session["spLig"]: + a_script.append( + "################################## Ligand ######################################" + ) + if session["nmLig"]: + a_script.append("# Normal Ligand") + nmLigFile = uploadedFiles["nmLigFile"][0][1] + a_script.append( + "nmLigFile=%s # the file name of ligand without suffix `.pdb` or `.sdf`" + % nmLigFile[:-4] + ) # depending on the uploaded file format,convert it to pdb or sdf file. - if nmLigFile[-4:] == '.sdf': - a_script.append('obabel ${nmLigFile}.sdf -O ${nmLigFile}.pdb -p # convert to pdb file for tleap, -p: add hydrogens appropriate for pH7.4') - elif nmLigFile[-4:] == '.pdb': - a_script.append('obabel ${nmLigFile}.pdb -O ${nmLigFile}.sdf -p # convert to sdf file for openmmdl_analysis, -p: add hydrogens appropriate for pH7.4') - - a_script.append('charge_method=%s # refers to the charge method that antechamber will adopt' % session['charge_method'] ) - a_script.append('charge_value=%s # Enter the net molecular charge of the ligand as integer (e.g. 1 or -2)' % session['charge_value']) - a_script.append('lig_ff=%s # Ligand force field \n' % session['lig_ff']) - - a_script.append('## Clean the PDB file by pdb4amber') - a_script.append(('pdb4amber -i ${nmLigFile}.pdb -o ${nmLigFile}_amber.pdb\n')) - - a_script.append('## Generate a prepc file and an additional frcmod file by `antechamber`') - a_script.append('antechamber -fi pdb -fo prepc -i ${nmLigFile}_amber.pdb -o ${nmLigFile}.prepc -c ${charge_method} -at ${lig_ff} -nc ${charge_value} -pf y') - a_script.append('parmchk2 -f prepc -i ${nmLigFile}.prepc -o ${nmLigFile}.frcmod\n') - a_script.append('## Rename ligand pdb') - a_script.append('antechamber -i ${nmLigFile}.prepc -fi prepc -o rename_${nmLigFile}.pdb -fo pdb\n') - - if session['spLig']: - a_script.append('# Special Ligand') - spLigFile = uploadedFiles['spLigFile'][0][1] - a_script.append('spLigFile=%s # the file name of ligand without suffix `.pdb`' % spLigFile[:-4] ) - prepcFile = uploadedFiles['prepcFile'][0][1] - prepcFile = prepcFile [:-6] - a_script.append('prepc=%s # the file name without suffix `prepc`' % prepcFile ) - - frcmodFile = uploadedFiles['frcmodFile'][0][1] + if nmLigFile[-4:] == ".sdf": + a_script.append( + "obabel ${nmLigFile}.sdf -O ${nmLigFile}.pdb -p # convert to pdb file for tleap, -p: add hydrogens appropriate for pH7.4" + ) + elif nmLigFile[-4:] == ".pdb": + a_script.append( + "obabel ${nmLigFile}.pdb -O ${nmLigFile}.sdf -p # convert to sdf file for openmmdl_analysis, -p: add hydrogens appropriate for pH7.4" + ) + + a_script.append( + "charge_method=%s # refers to the charge method that antechamber will adopt" + % session["charge_method"] + ) + a_script.append( + "charge_value=%s # Enter the net molecular charge of the ligand as integer (e.g. 1 or -2)" + % session["charge_value"] + ) + a_script.append("lig_ff=%s # Ligand force field \n" % session["lig_ff"]) + + a_script.append("## Clean the PDB file by pdb4amber") + a_script.append(("pdb4amber -i ${nmLigFile}.pdb -o ${nmLigFile}_amber.pdb\n")) + + a_script.append( + "## Generate a prepc file and an additional frcmod file by `antechamber`" + ) + a_script.append( + "antechamber -fi pdb -fo prepc -i ${nmLigFile}_amber.pdb -o ${nmLigFile}.prepc -c ${charge_method} -at ${lig_ff} -nc ${charge_value} -pf y" + ) + a_script.append( + "parmchk2 -f prepc -i ${nmLigFile}.prepc -o ${nmLigFile}.frcmod\n" + ) + a_script.append("## Rename ligand pdb") + a_script.append( + "antechamber -i ${nmLigFile}.prepc -fi prepc -o rename_${nmLigFile}.pdb -fo pdb\n" + ) + + if session["spLig"]: + a_script.append("# Special Ligand") + spLigFile = uploadedFiles["spLigFile"][0][1] + a_script.append( + "spLigFile=%s # the file name of ligand without suffix `.pdb`" + % spLigFile[:-4] + ) + prepcFile = uploadedFiles["prepcFile"][0][1] + prepcFile = prepcFile[:-6] + a_script.append("prepc=%s # the file name without suffix `prepc`" % prepcFile) + + frcmodFile = uploadedFiles["frcmodFile"][0][1] frcmodFile = frcmodFile[:-7] - a_script.append('frcmod=%s # the file name without suffix `frcmod`\n' % frcmodFile ) + a_script.append( + "frcmod=%s # the file name without suffix `frcmod`\n" % frcmodFile + ) - a_script.append('## Clean the PDB file by pdb4amber') - a_script.append(('pdb4amber -i ${spLigFile}.pdb -o ${spLigFile}_amber.pdb\n')) + a_script.append("## Clean the PDB file by pdb4amber") + a_script.append(("pdb4amber -i ${spLigFile}.pdb -o ${spLigFile}_amber.pdb\n")) # get the name of ligand in the pdb file: it is the fourth column of the first line - a_script.append('spLigName=$(awk \'NR==1 {print $4}\' ${spLigFile}_amber.pdb)\n') + a_script.append("spLigName=$(awk 'NR==1 {print $4}' ${spLigFile}_amber.pdb)\n") # Combine all components to be modelled. - if session['nmLig'] or session['spLig']: - a_script.append('###################### Combine All Components to Be Modelled ####################') - a_script.append('cat > tleap.combine.in < tleap.combine.in < tleap.combine.out') + a_script.append("rcp = loadpdb ${rcp_nm}_cnt_rmv.pdb") + if session["nmLig"] and session["spLig"]: + a_script.append("nmLig = loadpdb rename_${nmLigFile}.pdb ") + a_script.append("spLig = loadpdb ${spLigFile}_amber.pdb ") + a_script.append("comp = combine{rcp nmLig spLig}") + elif session["nmLig"]: + a_script.append("nmLig = loadpdb rename_${nmLigFile}.pdb ") + a_script.append("comp = combine{rcp nmLig}") + elif session["spLig"]: + a_script.append("spLig = loadpdb ${spLigFile}_amber.pdb") + a_script.append("comp = combine {rcp spLig}") + + a_script.append("savepdb comp comp.pdb") + a_script.append("\nquit") + a_script.append("\nEOF\n") + a_script.append("tleap -s -f tleap.combine.in > tleap.combine.out") ## remove 'CONECT' line in the pdb file a_script.append("grep -v '^CONECT' comp.pdb > comp_cnt_rmv.pdb\n") # Add Water/Membrane - a_script.append('################################ Add Water/Membrane ##############################') + a_script.append( + "################################ Add Water/Membrane ##############################" + ) ## Box setting - addType = session['addType'] - if addType == 'addWater': - boxType = session['boxType'] - if boxType == 'cube': - a_script.append('boxType=solvatebox # `solvatebox`, a command in tleap, creates a cubic box ') - a_script.append('dist=%s # the minimum distance between any atom originally present in solute and the edge of the periodic box.' % session['dist']) - elif boxType == 'octahedron': - a_script.append('boxType=solvateoct # `solvateoct`, a command in tleap, creates a truncated octahedron box.') - a_script.append('dist=%s # the minimum distance between any atom originally present in solute and the edge of the periodic box' % session['dist']) - elif boxType == 'cap': - a_script.append('boxType=solvatecap # `solvatecap`, a command in tleap, creates a solvent cap around solute. In development!') - a_script.append('radius=%s # the radius of the sphere' % session['dist']) - elif boxType == 'shell': - a_script.append('boxType=solvateshell # `solvatecap`, a command in tleap, adds a solent shell to solute, which reflect the contours of the original solute molecule. ') - a_script.append('thickness=%s # the thickness of the shell' % session['dist']) - elif addType == 'addMembrane': - lipid_tp = session['lipid_tp'] - if lipid_tp != 'other_lipid_tp': - a_script.append('lipid_tp=%s' % session['lipid_tp']) - a_script.append('lipid_ratio=1') - elif lipid_tp == 'other_lipid_tp': - a_script.append('lipid_tp=%s # The command to check supported lipids: packmol-memgen --available_lipids' % session['other_lipid_tp_input']) - a_script.append(('lipid_ratio=%s # Set to 1 if only one lipid required'% session['lipid_ratio'])) - - lipid_ff = session['lipid_ff'] - if lipid_ff != 'other_lipid_ff': - a_script.append('lipid_ff=%s' % session['lipid_ff']) - elif lipid_ff == 'other_lipid_ff': - a_script.append('lipid_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_lipid_ff_input']) - - a_script.append('dist2Border=%s # The minimum distance between the maxmin values for x y and z to the box boundaries. Flag --dist' % session['dist2Border']) - a_script.append('padDist=%s # The width of the water layer over the membrane or protein in the z axis. Flag --dist_wat' % session['padDist'] ) + addType = session["addType"] + if addType == "addWater": + boxType = session["boxType"] + if boxType == "cube": + a_script.append( + "boxType=solvatebox # `solvatebox`, a command in tleap, creates a cubic box " + ) + a_script.append( + "dist=%s # the minimum distance between any atom originally present in solute and the edge of the periodic box." + % session["dist"] + ) + elif boxType == "octahedron": + a_script.append( + "boxType=solvateoct # `solvateoct`, a command in tleap, creates a truncated octahedron box." + ) + a_script.append( + "dist=%s # the minimum distance between any atom originally present in solute and the edge of the periodic box" + % session["dist"] + ) + elif boxType == "cap": + a_script.append( + "boxType=solvatecap # `solvatecap`, a command in tleap, creates a solvent cap around solute. In development!" + ) + a_script.append("radius=%s # the radius of the sphere" % session["dist"]) + elif boxType == "shell": + a_script.append( + "boxType=solvateshell # `solvatecap`, a command in tleap, adds a solent shell to solute, which reflect the contours of the original solute molecule. " + ) + a_script.append( + "thickness=%s # the thickness of the shell" % session["dist"] + ) + elif addType == "addMembrane": + lipid_tp = session["lipid_tp"] + if lipid_tp != "other_lipid_tp": + a_script.append("lipid_tp=%s" % session["lipid_tp"]) + a_script.append("lipid_ratio=1") + elif lipid_tp == "other_lipid_tp": + a_script.append( + "lipid_tp=%s # The command to check supported lipids: packmol-memgen --available_lipids" + % session["other_lipid_tp_input"] + ) + a_script.append( + ( + "lipid_ratio=%s # Set to 1 if only one lipid required" + % session["lipid_ratio"] + ) + ) + + lipid_ff = session["lipid_ff"] + if lipid_ff != "other_lipid_ff": + a_script.append("lipid_ff=%s" % session["lipid_ff"]) + elif lipid_ff == "other_lipid_ff": + a_script.append( + "lipid_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_lipid_ff_input"] + ) + + a_script.append( + "dist2Border=%s # The minimum distance between the maxmin values for x y and z to the box boundaries. Flag --dist" + % session["dist2Border"] + ) + a_script.append( + "padDist=%s # The width of the water layer over the membrane or protein in the z axis. Flag --dist_wat" + % session["padDist"] + ) ## Water Setting - water_ff = session['water_ff'] - if water_ff != 'other_water_ff': - a_script.append('water_ff=%s' % session['water_ff']) - if addType == 'addWater': - water_ff = session['water_ff'] - if water_ff == 'tip3p': - a_script.append('solvent=%sBOX # set the water box' % session['water_ff'].upper()) - elif water_ff == 'fb3': - a_script.append('solvent=TIP3PFBOX # set the water box') - elif water_ff == 'spce': - a_script.append('solvent=SPCBOX # set the water box') - elif water_ff == 'tip4pew': - a_script.append('solvent=TIP4PEWBOX # set the water box') - elif water_ff == 'fb4': - a_script.append('solvent=TIP4PBOX # set the water box') - elif water_ff == 'opc': - a_script.append('solvent=OPCBOX # set the water box') - elif water_ff == 'opc3': - a_script.append('solvent=OPC3BOX # set the water box') - elif water_ff == 'other_water_ff': - a_script.append('water_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`' % session['other_water_ff_input']) - if addType == 'addWater': - a_script.append('solvent=%sBOX # set the water box' % session['other_water_ff_input'].upper()) - + water_ff = session["water_ff"] + if water_ff != "other_water_ff": + a_script.append("water_ff=%s" % session["water_ff"]) + if addType == "addWater": + water_ff = session["water_ff"] + if water_ff == "tip3p": + a_script.append( + "solvent=%sBOX # set the water box" % session["water_ff"].upper() + ) + elif water_ff == "fb3": + a_script.append("solvent=TIP3PFBOX # set the water box") + elif water_ff == "spce": + a_script.append("solvent=SPCBOX # set the water box") + elif water_ff == "tip4pew": + a_script.append("solvent=TIP4PEWBOX # set the water box") + elif water_ff == "fb4": + a_script.append("solvent=TIP4PBOX # set the water box") + elif water_ff == "opc": + a_script.append("solvent=OPCBOX # set the water box") + elif water_ff == "opc3": + a_script.append("solvent=OPC3BOX # set the water box") + elif water_ff == "other_water_ff": + a_script.append( + "water_ff=%s # See the supported force fields in the original file at `$AMBERHOME/dat/leap/cmd/`" + % session["other_water_ff_input"] + ) + if addType == "addWater": + a_script.append( + "solvent=%sBOX # set the water box" + % session["other_water_ff_input"].upper() + ) ## Ion Setting - pos_ion = session['pos_ion'] - if pos_ion != 'other_pos_ion': - a_script.append('pos_ion=%s' % session['pos_ion']) - elif pos_ion == 'other_pos_ion': - a_script.append('pos_ion=%s # In development!' % session['other_pos_ion_input']) - - neg_ion = session['neg_ion'] - if neg_ion != 'other_neg_ion': - a_script.append('neg_ion=%s' % session['neg_ion']) - elif neg_ion == 'other_neg_ion': - a_script.append('neg_ion=%s # In development!' % session['other_neg_ion_input']) - - if addType == 'addWater': - a_script.append('numIon=0 # `numIon` is the flag for `addions` in tleap. When set to 0, the system will be neutralized' ) - a_script.append('\n') - elif addType == 'addMembrane': - a_script.append('ionConc=%s' % session['ionConc']) - a_script.append('\n') - + pos_ion = session["pos_ion"] + if pos_ion != "other_pos_ion": + a_script.append("pos_ion=%s" % session["pos_ion"]) + elif pos_ion == "other_pos_ion": + a_script.append( + "pos_ion=%s # In development!" % session["other_pos_ion_input"] + ) + + neg_ion = session["neg_ion"] + if neg_ion != "other_neg_ion": + a_script.append("neg_ion=%s" % session["neg_ion"]) + elif neg_ion == "other_neg_ion": + a_script.append( + "neg_ion=%s # In development!" % session["other_neg_ion_input"] + ) + + if addType == "addWater": + a_script.append( + "numIon=0 # `numIon` is the flag for `addions` in tleap. When set to 0, the system will be neutralized" + ) + a_script.append("\n") + elif addType == "addMembrane": + a_script.append("ionConc=%s" % session["ionConc"]) + a_script.append("\n") + ## Build the membrane - if addType == 'addMembrane': - a_script.append('## Build the membrane') - if session['nmLig'] == False and session['spLig'] == False: - a_script.append('packmol-memgen --pdb ${rcp_nm}_cnt_rmv.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n') - a_script.append('## Clean the complex pdb by `pdb4amber` for further `tleap` process') - a_script.append('pdb4amber -i bilayer_${rcp_nm}_cnt_rmv.pdb -o clean_bilayer_${rcp_nm}.pdb') + if addType == "addMembrane": + a_script.append("## Build the membrane") + if session["nmLig"] == False and session["spLig"] == False: + a_script.append( + "packmol-memgen --pdb ${rcp_nm}_cnt_rmv.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n" + ) + a_script.append( + "## Clean the complex pdb by `pdb4amber` for further `tleap` process" + ) + a_script.append( + "pdb4amber -i bilayer_${rcp_nm}_cnt_rmv.pdb -o clean_bilayer_${rcp_nm}.pdb" + ) ## remove 'CONECT' line in the pdb file - a_script.append("grep -v '^CONECT' clean_bilayer_${rcp_nm}.pdb > clean_bilayer_${rcp_nm}_cnt_rmv.pdb") - a_script.append('\n') - if session['nmLig'] or session['spLig']: - a_script.append('packmol-memgen --pdb comp.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n') - a_script.append('## Clean the complex pdb by `pdb4amber` for further `tleap` process') - a_script.append('pdb4amber -i bilayer_comp.pdb -o clean_bilayer_comp.pdb') + a_script.append( + "grep -v '^CONECT' clean_bilayer_${rcp_nm}.pdb > clean_bilayer_${rcp_nm}_cnt_rmv.pdb" + ) + a_script.append("\n") + if session["nmLig"] or session["spLig"]: + a_script.append( + "packmol-memgen --pdb comp.pdb --lipids ${lipid_tp} --ratio ${lipid_ratio} --preoriented --dist ${dist2Border} --dist_wat ${padDist} --salt --salt_c ${pos_ion} --saltcon ${ionConc} --nottrim --overwrite --notprotonate\n" + ) + a_script.append( + "## Clean the complex pdb by `pdb4amber` for further `tleap` process" + ) + a_script.append("pdb4amber -i bilayer_comp.pdb -o clean_bilayer_comp.pdb") ## remove 'CONECT' line in the pdb file - a_script.append("grep -v '^CONECT' clean_bilayer_comp.pdb > clean_bilayer_comp_cnt_rmv.pdb") - a_script.append('\n') + a_script.append( + "grep -v '^CONECT' clean_bilayer_comp.pdb > clean_bilayer_comp_cnt_rmv.pdb" + ) + a_script.append("\n") # Generate the prmtop and frcmod file for the complex. - a_script.append('##################### Generate Prmtop and Frcmod File for the Complex ###################### ') - a_script.append('cat > tleap.in < tleap.in < tleap.out') + a_script.append("savepdb system system.${water_ff}.pdb") + a_script.append( + "saveamberparm system system.${water_ff}.prmtop system.${water_ff}.inpcrd" + ) + a_script.append("\nquit") + a_script.append("\nEOF") + a_script.append("\ntleap -s -f tleap.in > tleap.out") + + return "\n".join(a_script) - return '\n'.join(a_script) def extractLigName(LigFileName): - """ + """ Extract the ligand name from the pdb file as a string, which is the fourth column of the first line in the pdb file. This string can be used for openmmdl_analysis in later function `createScript`. @@ -556,22 +732,24 @@ def extractLigName(LigFileName): LigFile: the tuple that stores both the buffered file and its name, uploadedFiles['nmLigFile'][0][1] """ - - if LigFileName[-4:] == '.sdf': - LigName = 'UNL' - elif LigFileName[-4:] == '.pdb': + if LigFileName[-4:] == ".sdf": + LigName = "UNL" + elif LigFileName[-4:] == ".pdb": LigName = LigFileName[:-4] return LigName + ######################################################################################################################## -@app.route('/getCurrentStructure') + +@app.route("/getCurrentStructure") def getCurrentStructure(): pdb = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, pdb) return pdb.getvalue() + def showSelectChains(): chains = [] hasHeterogen = False @@ -584,23 +762,27 @@ def showSelectChains(): elif any(r in dnaResidues for r in residues): content = "DNA" else: - content = ', '.join(set(residues)) + content = ", ".join(set(residues)) hasHeterogen = True chains.append((chain.id, len(residues), content)) if len(chains) < 2 and not hasHeterogen: - session['heterogens'] = 'all' + session["heterogens"] = "all" return showAddResidues() - return render_template('selectChains.html', chains=chains) + return render_template("selectChains.html", chains=chains) -@app.route('/selectChains', methods=['POST']) + +@app.route("/selectChains", methods=["POST"]) def selectChains(): - session['heterogens'] = request.form.get('heterogens', '') + session["heterogens"] = request.form.get("heterogens", "") numChains = len(list(fixer.topology.chains())) - request.form.getlist('include') - deleteIndices = [i for i in range(numChains) if str(i) not in request.form.getlist('include')] + request.form.getlist("include") + deleteIndices = [ + i for i in range(numChains) if str(i) not in request.form.getlist("include") + ] fixer.removeChains(deleteIndices) return showAddResidues() + def showAddResidues(): spans = [] chains = list(fixer.topology.chains()) @@ -612,50 +794,63 @@ def showAddResidues(): chain = chains[key[0]] chainResidues = list(chain.residues()) if key[1] < len(chainResidues): - offset = int(chainResidues[key[1]].id)-len(residues)-1 + offset = int(chainResidues[key[1]].id) - len(residues) - 1 else: offset = int(chainResidues[-1].id) - spans.append((chain.id, offset+1, offset+len(residues), ', '.join(residues))) - return render_template('addResidues.html', spans=spans) + spans.append( + (chain.id, offset + 1, offset + len(residues), ", ".join(residues)) + ) + return render_template("addResidues.html", spans=spans) + -@app.route('/addResidues', methods=['POST']) +@app.route("/addResidues", methods=["POST"]) def addResidues(): keys = [key for key in sorted(fixer.missingResidues)] for i, key in enumerate(keys): - if str(i) not in request.form.getlist('add'): + if str(i) not in request.form.getlist("add"): del fixer.missingResidues[key] return showConvertResidues() + def showConvertResidues(): fixer.findNonstandardResidues() if len(fixer.nonstandardResidues) == 0: return showAddHeavyAtoms() residues = [] - nucleotides = ['DA', 'DC', 'DG', 'DT', 'A', 'C', 'G', 'T'] + nucleotides = ["DA", "DC", "DG", "DT", "A", "C", "G", "T"] for i in range(len(fixer.nonstandardResidues)): residue, replaceWith = fixer.nonstandardResidues[i] if replaceWith in proteinResidues: replacements = proteinResidues else: replacements = nucleotides - residues.append((residue.chain.id, residue.name, residue.id, replacements, replaceWith)) - return render_template('convertResidues.html', residues=residues) + residues.append( + (residue.chain.id, residue.name, residue.id, replacements, replaceWith) + ) + return render_template("convertResidues.html", residues=residues) + -@app.route('/convertResidues', methods=['POST']) +@app.route("/convertResidues", methods=["POST"]) def convertResidues(): for i in range(len(fixer.nonstandardResidues)): - if str(i) in request.form.getlist('convert'): - fixer.nonstandardResidues[i] = (fixer.nonstandardResidues[i][0], request.form['residue'+str(i)]) + if str(i) in request.form.getlist("convert"): + fixer.nonstandardResidues[i] = ( + fixer.nonstandardResidues[i][0], + request.form["residue" + str(i)], + ) fixer.replaceNonstandardResidues() return showAddHeavyAtoms() + def showAddHeavyAtoms(): - if session['heterogens'] == 'none': + if session["heterogens"] == "none": fixer.removeHeterogens(False) - elif session['heterogens'] == 'water': + elif session["heterogens"] == "water": fixer.removeHeterogens(True) fixer.findMissingAtoms() - allResidues = list(set(fixer.missingAtoms.keys()).union(fixer.missingTerminals.keys())) + allResidues = list( + set(fixer.missingAtoms.keys()).union(fixer.missingTerminals.keys()) + ) allResidues.sort(key=lambda x: x.index) if len(allResidues) == 0: return addHeavyAtoms() @@ -666,58 +861,73 @@ def showAddHeavyAtoms(): atoms.extend(atom.name for atom in fixer.missingAtoms[residue]) if residue in fixer.missingTerminals: atoms.extend(atom for atom in fixer.missingTerminals[residue]) - residues.append((residue.chain.id, residue.name, residue.id, ', '.join(atoms))) - return render_template('addHeavyAtoms.html', residues=residues) + residues.append((residue.chain.id, residue.name, residue.id, ", ".join(atoms))) + return render_template("addHeavyAtoms.html", residues=residues) -@app.route('/addHeavyAtoms', methods=['POST']) + +@app.route("/addHeavyAtoms", methods=["POST"]) def addHeavyAtoms(): fixer.addMissingAtoms() return showAddHydrogens() + def showAddHydrogens(): unitCell = fixer.topology.getUnitCellDimensions() if unitCell is not None: unitCell = unitCell.value_in_unit(unit.nanometer) - boundingBox = tuple((max((pos[i] for pos in fixer.positions))-min((pos[i] for pos in fixer.positions))).value_in_unit(unit.nanometer) for i in range(3)) - return render_template('addHydrogens.html', unitCell=unitCell, boundingBox=boundingBox) - -@app.route('/addHydrogens', methods=['POST']) + boundingBox = tuple( + ( + max((pos[i] for pos in fixer.positions)) + - min((pos[i] for pos in fixer.positions)) + ).value_in_unit(unit.nanometer) + for i in range(3) + ) + return render_template( + "addHydrogens.html", unitCell=unitCell, boundingBox=boundingBox + ) + + +@app.route("/addHydrogens", methods=["POST"]) def addHydrogens(): - session['solvent'] = False - if 'addHydrogens' in request.form: - pH = float(request.form.get('ph', '7')) + session["solvent"] = False + if "addHydrogens" in request.form: + pH = float(request.form.get("ph", "7")) fixer.addMissingHydrogens(pH) - if 'addWater' in request.form: - session['solvent'] = True - session['add_membrane'] = False + if "addWater" in request.form: + session["solvent"] = True + session["add_membrane"] = False padding, boxSize, boxShape = None, None, None - if request.form['boxType'] == 'geometry': - session['water_padding'] = True - session['water_padding_distance'] = float(request.form['geomPadding']) - session['water_boxShape'] = request.form['geometryDropdown'] + if request.form["boxType"] == "geometry": + session["water_padding"] = True + session["water_padding_distance"] = float(request.form["geomPadding"]) + session["water_boxShape"] = request.form["geometryDropdown"] else: - session['water_padding'] = False - session['box_x'] = (float(request.form['boxx'])) - session['box_y'] = (float(request.form['boxy'])) - session['box_z'] = (float(request.form['boxz'])) - boxSize = (float(request.form['boxx']), float(request.form['boxy']), float(request.form['boxz'])) - session['water_ionicstrength'] = float(request.form['ionicstrength']) - session['water_positive'] = request.form['positiveion']+'+' - session['water_negative'] = request.form['negativeion']+'-' - elif 'addMembrane' in request.form: - session['solvent'] = True - session['add_membrane'] = True - session['lipidType'] = request.form['lipidType'] - session['membrane_padding'] = float(request.form['membranePadding']) - session['membrane_ionicstrength'] = float(request.form['ionicstrength']) - session['membrane_positive'] = request.form['positiveion']+'+' - session['membrane_negative'] = request.form['negativeion']+'-' - + session["water_padding"] = False + session["box_x"] = float(request.form["boxx"]) + session["box_y"] = float(request.form["boxy"]) + session["box_z"] = float(request.form["boxz"]) + boxSize = ( + float(request.form["boxx"]), + float(request.form["boxy"]), + float(request.form["boxz"]), + ) + session["water_ionicstrength"] = float(request.form["ionicstrength"]) + session["water_positive"] = request.form["positiveion"] + "+" + session["water_negative"] = request.form["negativeion"] + "-" + elif "addMembrane" in request.form: + session["solvent"] = True + session["add_membrane"] = True + session["lipidType"] = request.form["lipidType"] + session["membrane_padding"] = float(request.form["membranePadding"]) + session["membrane_ionicstrength"] = float(request.form["ionicstrength"]) + session["membrane_positive"] = request.form["positiveion"] + "+" + session["membrane_negative"] = request.form["negativeion"] + "-" + # Save the new PDB file. - - uploadedFiles['originalFile'] = uploadedFiles['file'] + + uploadedFiles["originalFile"] = uploadedFiles["file"] pdb = StringIO() - if session['pdbType'] == 'pdb': + if session["pdbType"] == "pdb": try: PDBFile.writeFile(fixer.topology, fixer.positions, pdb, True) except: @@ -727,123 +937,151 @@ def addHydrogens(): else: PDBxFile.writeFile(fixer.topology, fixer.positions, pdb, True) temp = tempfile.TemporaryFile() - temp.write(pdb.getvalue().encode('utf-8')) - name = uploadedFiles['file'][0][1] - dotIndex = name.rfind('.') + temp.write(pdb.getvalue().encode("utf-8")) + name = uploadedFiles["file"][0][1] + dotIndex = name.rfind(".") if dotIndex == -1: prefix = name - suffix = '' + suffix = "" else: prefix = name[:dotIndex] suffix = name[dotIndex:] - uploadedFiles['file'] = [(temp, prefix+'-processed_openMMDL'+suffix)] + uploadedFiles["file"] = [(temp, prefix + "-processed_openMMDL" + suffix)] return showSimulationOptions() -@app.route('/showSimulationOptions') + +@app.route("/showSimulationOptions") def showSimulationOptions(): - file_type = session.get('fileType', '') + file_type = session.get("fileType", "") # render buttons based on the fileType - if file_type == 'pdb': - return render_template('simulationOptions.html', display_save_script=True, display_processed_pdb=True, display_save_all_files=True) - elif file_type == 'amber': - return render_template('simulationOptions.html', display_save_script=True, display_processed_pdb=False, display_save_all_files=False) - -@app.route('/setSimulationOptions', methods=['POST']) + if file_type == "pdb": + return render_template( + "simulationOptions.html", + display_save_script=True, + display_processed_pdb=True, + display_save_all_files=True, + ) + elif file_type == "amber": + return render_template( + "simulationOptions.html", + display_save_script=True, + display_processed_pdb=False, + display_save_all_files=False, + ) + + +@app.route("/setSimulationOptions", methods=["POST"]) def setSimulationOptions(): for key in request.form: session[key] = request.form[key] - session['ligand'] = 'ligand' in request.form # store whether the ligand is present, so the retruned value can be 'True' or 'False' - session['writeDCD'] = 'writeDCD' in request.form - session['writeData'] = 'writeData' in request.form - session['writeCheckpoint'] = 'writeCheckpoint' in request.form - session['dataFields'] = request.form.getlist('dataFields') - session['hmr'] = 'hmr' in request.form - session['writeSimulationXml'] = 'writeSimulationXml' in request.form - session['writeFinalState'] = 'writeFinalState' in request.form + session["ligand"] = ( + "ligand" in request.form + ) # store whether the ligand is present, so the retruned value can be 'True' or 'False' + session["writeDCD"] = "writeDCD" in request.form + session["writeData"] = "writeData" in request.form + session["writeCheckpoint"] = "writeCheckpoint" in request.form + session["dataFields"] = request.form.getlist("dataFields") + session["hmr"] = "hmr" in request.form + session["writeSimulationXml"] = "writeSimulationXml" in request.form + session["writeFinalState"] = "writeFinalState" in request.form return createScript() -@app.route('/downloadScript') + +@app.route("/downloadScript") def downloadScript(): response = make_response(createScript()) - response.headers['Content-Disposition'] = 'attachment; filename="OpenMMDL_Simulation.py"' + response.headers["Content-Disposition"] = ( + 'attachment; filename="OpenMMDL_Simulation.py"' + ) return response -@app.route('/downloadStructuralfiles') + +@app.route("/downloadStructuralfiles") def downloadStructuralfiles(): - file, name = uploadedFiles['file'][0] + file, name = uploadedFiles["file"][0] file.seek(0, 0) response = make_response(file.read()) - response.headers['Content-Disposition'] = 'attachment; filename="%s"' % name + response.headers["Content-Disposition"] = 'attachment; filename="%s"' % name return response -@app.route('/downloadPackage') + +@app.route("/downloadPackage") def downloadPackage(): temp = tempfile.NamedTemporaryFile() - with zipfile.ZipFile(temp, 'w', zipfile.ZIP_DEFLATED) as zip: - zip.writestr('openmmdl_simulation/OpenMMDL_Simulation.py', createScript()) + with zipfile.ZipFile(temp, "w", zipfile.ZIP_DEFLATED) as zip: + zip.writestr("openmmdl_simulation/OpenMMDL_Simulation.py", createScript()) for key in uploadedFiles: for file, name in uploadedFiles[key]: file.seek(0, 0) - zip.writestr('openmmdl_simulation/%s' % name, file.read()) + zip.writestr("openmmdl_simulation/%s" % name, file.read()) temp.seek(0, 0) - return send_file(temp, 'application/zip', True, 'openmmdl_simulation.zip', max_age=0) + return send_file( + temp, "application/zip", True, "openmmdl_simulation.zip", max_age=0 + ) def configureDefaultOptions(): """Select default options based on the file format and force field.""" implicitWater = False - session['restart_checkpoint'] = 'no' - session['mdtraj_output'] = 'mdtraj_pdb_dcd' - session['mdtraj_removal'] = 'False' - session['mda_output'] = 'mda_pdb_dcd' - session['mda_selection'] = 'mda_prot_lig_all' - session['openmmdl_analysis'] = 'No' - session['analysis_selection'] = 'analysis_all' - session['binding_mode'] = '40' - session['min_transition'] = '1' - session['rmsd_diff'] = 'No' - session['pml_generation'] = 'True' - session['stable_water'] = 'Yes' - session['wc_distance'] = '1.0' - if session['fileType'] == 'pdb' and session['waterModel'] == 'implicit': + session["restart_checkpoint"] = "no" + session["mdtraj_output"] = "mdtraj_pdb_dcd" + session["mdtraj_removal"] = "False" + session["mda_output"] = "mda_pdb_dcd" + session["mda_selection"] = "mda_prot_lig_all" + session["openmmdl_analysis"] = "No" + session["analysis_selection"] = "analysis_all" + session["binding_mode"] = "40" + session["min_transition"] = "1" + session["rmsd_diff"] = "No" + session["pml_generation"] = "True" + session["stable_water"] = "Yes" + session["wc_distance"] = "1.0" + if session["fileType"] == "pdb" and session["waterModel"] == "implicit": implicitWater = True - session['ensemble'] = 'nvt' if implicitWater else 'npt' - session['platform'] = 'CUDA' - session['precision'] = 'mixed' - session['cutoff'] = '2.0' if implicitWater else '1.0' - session['ewaldTol'] = '0.0005' - session['constraintTol'] = '0.000001' - session['hmr'] = True - session['hmrMass'] = '1.5' - session['dt'] = '0.002' - session['sim_length'] = '50' - session['equilibration_length'] = '0.5' - session['temperature'] = '300' - session['friction'] = '1.0' - session['pressure'] = '1.0' - session['barostatInterval'] = '25' - session['nonbondedMethod'] = 'CutoffNonPeriodic' if implicitWater else 'PME' - session['writeDCD'] = True - session['dcdFilename'] = 'trajectory.dcd' - session['dcdFrames'] = '1000' - session['pdbInterval_ns'] = '10' - session['writeData'] = True - session['dataFilename'] = 'log.txt' - session['dataInterval'] = '1000' - session['dataFields'] = ['step', 'speed' ,'progress', 'potentialEnergy', 'temperature'] - session['writeCheckpoint'] = True - session['checkpointFilename'] = 'checkpoint.chk' - session['checkpointInterval_ns'] = '0.02' - session['writeSimulationXml'] = False - session['systemXmlFilename'] = 'system.xml' - session['integratorXmlFilename'] = 'integrator.xml' - session['writeFinalState'] = False - session['finalStateFileType'] = 'stateXML' - session['finalStateFilename'] = "final_state.xml" - session['constraints'] = 'hbonds' - session['rmsd'] = 'True' - session['md_postprocessing'] = 'True' + session["ensemble"] = "nvt" if implicitWater else "npt" + session["platform"] = "CUDA" + session["precision"] = "mixed" + session["cutoff"] = "2.0" if implicitWater else "1.0" + session["ewaldTol"] = "0.0005" + session["constraintTol"] = "0.000001" + session["hmr"] = True + session["hmrMass"] = "1.5" + session["dt"] = "0.002" + session["sim_length"] = "50" + session["equilibration_length"] = "0.5" + session["temperature"] = "300" + session["friction"] = "1.0" + session["pressure"] = "1.0" + session["barostatInterval"] = "25" + session["nonbondedMethod"] = "CutoffNonPeriodic" if implicitWater else "PME" + session["writeDCD"] = True + session["dcdFilename"] = "trajectory.dcd" + session["dcdFrames"] = "1000" + session["pdbInterval_ns"] = "10" + session["writeData"] = True + session["dataFilename"] = "log.txt" + session["dataInterval"] = "1000" + session["dataFields"] = [ + "step", + "speed", + "progress", + "potentialEnergy", + "temperature", + ] + session["writeCheckpoint"] = True + session["checkpointFilename"] = "checkpoint.chk" + session["checkpointInterval_ns"] = "0.02" + session["writeSimulationXml"] = False + session["systemXmlFilename"] = "system.xml" + session["integratorXmlFilename"] = "integrator.xml" + session["writeFinalState"] = False + session["finalStateFileType"] = "stateXML" + session["finalStateFilename"] = "final_state.xml" + session["constraints"] = "hbonds" + session["rmsd"] = "True" + session["md_postprocessing"] = "True" + def createScript(isInternal=False): script = [] @@ -852,7 +1090,8 @@ def createScript(isInternal=False): # to set the working directory and redirect stdout to the pipe. if isInternal: - script.append(""" + script.append( + """ import os import sys import time @@ -863,12 +1102,17 @@ def write(self, string): sys.stdout = PipeOutput() sys.stderr = PipeOutput() -os.chdir(outputDir)""") +os.chdir(outputDir)""" + ) # Header - - script.append('# This script was generated by OpenMM-MDL Setup on %s.\n' % datetime.date.today()) - script.append(''' + + script.append( + "# This script was generated by OpenMM-MDL Setup on %s.\n" + % datetime.date.today() + ) + script.append( + """ # ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. # .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| # / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) @@ -880,223 +1124,309 @@ def write(self, string): # '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` -''') - script.append('from openmmdl.openmmdl_simulation.scripts.forcefield_water import ff_selection, water_forcefield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield') - script.append('from openmmdl.openmmdl_simulation.scripts.protein_ligand_prep import prepare_ligand, rdkit_to_openmm, merge_protein_and_ligand, water_padding_solvent_builder, water_absolute_solvent_builder, membrane_builder, water_conversion') - script.append('from openmmdl.openmmdl_simulation.scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion') - script.append('from openmmdl.openmmdl_simulation.scripts.cleaning_procedures import cleanup, create_directory_if_not_exists, copy_file, organize_files, post_md_file_movement \n') - - script.append('import simtk.openmm.app as app') - script.append('from simtk.openmm.app import PDBFile, Modeller, PDBReporter, StateDataReporter, DCDReporter, CheckpointReporter, AmberPrmtopFile, AmberInpcrdFile') - script.append('from simtk.openmm import unit, Platform, MonteCarloBarostat, LangevinMiddleIntegrator') - script.append('from simtk.openmm import Vec3') - script.append('import simtk.openmm as mm') - script.append('import pdbfixer') - script.append('import sys') - script.append('import os') - script.append('import shutil') - if session['openmmdl_analysis'] == 'Yes': - script.append('import subprocess') - +""" + ) + script.append( + "from openmmdl.openmmdl_simulation.scripts.forcefield_water import ff_selection, water_forcefield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield" + ) + script.append( + "from openmmdl.openmmdl_simulation.scripts.protein_ligand_prep import prepare_ligand, rdkit_to_openmm, merge_protein_and_ligand, water_padding_solvent_builder, water_absolute_solvent_builder, membrane_builder, water_conversion" + ) + script.append( + "from openmmdl.openmmdl_simulation.scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion" + ) + script.append( + "from openmmdl.openmmdl_simulation.scripts.cleaning_procedures import cleanup, create_directory_if_not_exists, copy_file, organize_files, post_md_file_movement \n" + ) + + script.append("import simtk.openmm.app as app") + script.append( + "from simtk.openmm.app import PDBFile, Modeller, PDBReporter, StateDataReporter, DCDReporter, CheckpointReporter, AmberPrmtopFile, AmberInpcrdFile" + ) + script.append( + "from simtk.openmm import unit, Platform, MonteCarloBarostat, LangevinMiddleIntegrator" + ) + script.append("from simtk.openmm import Vec3") + script.append("import simtk.openmm as mm") + script.append("import pdbfixer") + script.append("import sys") + script.append("import os") + script.append("import shutil") + if session["openmmdl_analysis"] == "Yes": + script.append("import subprocess") + # Input files - script.append('\n# Input Files') - fileType = session['fileType'] - if fileType == 'pdb': - script.append('''############# Ligand and Protein Data ###################''') - script.append('''######## Add the Ligand SDF File and Protein PDB File in the Folder with the Script ######### \n''') - pdbType = session['pdbType'] - if pdbType == 'pdb': - script.append('protein = "%s"' % uploadedFiles['file'][0][1]) - if session['sdfFile'] != '': - script.append("ligand = '%s'" % session['sdfFile']) + script.append("\n# Input Files") + fileType = session["fileType"] + if fileType == "pdb": + script.append("""############# Ligand and Protein Data ###################""") + script.append( + """######## Add the Ligand SDF File and Protein PDB File in the Folder with the Script ######### \n""" + ) + pdbType = session["pdbType"] + if pdbType == "pdb": + script.append('protein = "%s"' % uploadedFiles["file"][0][1]) + if session["sdfFile"] != "": + script.append("ligand = '%s'" % session["sdfFile"]) script.append('ligand_name = "UNK"') - script.append("minimization = %s" % session['ligandMinimization']) - script.append("sanitization = %s" % session['ligandSanitization']) - forcefield = session['forcefield'] - water = session['waterModel'] - elif fileType == 'amber': - script.append('''####### Add the Amber Files in the Folder with this Script ####### \n''') + script.append("minimization = %s" % session["ligandMinimization"]) + script.append("sanitization = %s" % session["ligandSanitization"]) + forcefield = session["forcefield"] + water = session["waterModel"] + elif fileType == "amber": + script.append( + """####### Add the Amber Files in the Folder with this Script ####### \n""" + ) # amber_files related variables - if session['has_files'] == 'yes': - script.append("prmtop_file = '%s'" % uploadedFiles['prmtopFile'][0][1]) - script.append('inpcrd_file = "%s"' % uploadedFiles['inpcrdFile'][0][1]) + if session["has_files"] == "yes": + script.append("prmtop_file = '%s'" % uploadedFiles["prmtopFile"][0][1]) + script.append('inpcrd_file = "%s"' % uploadedFiles["inpcrdFile"][0][1]) # ligand related variables - if session['nmLig'] == True: - nmLigName = session['nmLigName'] # e.g. 'UNL' + if session["nmLig"] == True: + nmLigName = session["nmLigName"] # e.g. 'UNL' else: nmLigName = None - if session['spLig'] == True: # success - spLigName = session['spLigName'] # e.g. 'HEME' + if session["spLig"] == True: # success + spLigName = session["spLigName"] # e.g. 'HEME' else: - spLigName = None + spLigName = None - elif session['has_files'] == 'no': - script.append("prmtop_file = 'system.%s.prmtop'" % session['water_ff']) - script.append("inpcrd_file = 'system.%s.inpcrd' " % session['water_ff']) + elif session["has_files"] == "no": + script.append("prmtop_file = 'system.%s.prmtop'" % session["water_ff"]) + script.append("inpcrd_file = 'system.%s.inpcrd' " % session["water_ff"]) # ligand related variables - if session['nmLig'] == True: - nmLigFileName = uploadedFiles['nmLigFile'][0][1] # e.g. '8QY.pdb' - nmLigName = extractLigName(nmLigFileName) # e.g '8QY' or 'UNL' # resname in topology + if session["nmLig"] == True: + nmLigFileName = uploadedFiles["nmLigFile"][0][1] # e.g. '8QY.pdb' + nmLigName = extractLigName( + nmLigFileName + ) # e.g '8QY' or 'UNL' # resname in topology else: nmLigFileName = None nmLigName = None - if session['spLig'] == True: # success - spLigFileName = uploadedFiles['spLigFile'][0][1] + if session["spLig"] == True: # success + spLigFileName = uploadedFiles["spLigFile"][0][1] spLigName = extractLigName(spLigFileName) else: spLigFileName = None - spLigName = None - + spLigName = None ## Feed prmtop_file and inpcrd_file to OpenMM Reader script.append("prmtop = AmberPrmtopFile(prmtop_file)") script.append("inpcrd = AmberInpcrdFile(inpcrd_file)") - + ##################### For debugging ################################# # print all key-value pairs in session # print(f"session is {session}") - print(f'fileType is {fileType}') + print(f"fileType is {fileType}") print(f"session['has_files'] is {session['has_files']}") print(f"session['nmLig'] is {session['nmLig']}") print(f"session['spLig'] is {session['spLig']}") - print(f'nmLigName is {nmLigName}') - print(f'spLigName is {spLigName}') - #print(f"nmLigFileName is {nmLigFileName}") - #print(f"nmLigName is {nmLigName}") - #print(f"spLigFileName is {spLigFileName}") - #print(f"spLigName is {spLigName}") + print(f"nmLigName is {nmLigName}") + print(f"spLigName is {spLigName}") + # print(f"nmLigFileName is {nmLigFileName}") + # print(f"nmLigName is {nmLigName}") + # print(f"spLigFileName is {spLigFileName}") + # print(f"spLigName is {spLigName}") ###################################################################### - if fileType == 'pdb': - script.append('''\n############# Forcefield, Water and Membrane Model Selection ###################\n''') - script.append("ff = '%s'" % session['forcefield']) - if water != 'None': + if fileType == "pdb": + script.append( + """\n############# Forcefield, Water and Membrane Model Selection ###################\n""" + ) + script.append("ff = '%s'" % session["forcefield"]) + if water != "None": script.append("water = '%s'" % water) else: script.append("water = %s" % water) - -################################## IF CLEANING WAS PERFORMED ############################################## -########################################################################################################### -########################################################################################################### - if fileType == 'pdb': - if session['solvent'] == True: - if session['add_membrane'] == True: - script.append('''\n############# Membrane Settings ###################\n''') - script.append("add_membrane = %s" % session['add_membrane']) - script.append("membrane_lipid_type = '%s'" % session['lipidType']) - script.append("membrane_padding = %s" % session['membrane_padding']) - script.append("membrane_ionicstrength = %s" % session['membrane_ionicstrength']) - script.append("membrane_positive_ion = '%s'" % session['membrane_positive']) - script.append("membrane_negative_ion = '%s'" % session['membrane_negative']) - elif session['add_membrane'] == False: - script.append('''\n############# Water Box Settings ###################\n''') - script.append("add_membrane = %s" % session['add_membrane']) - if session['water_padding'] == True: + ################################## IF CLEANING WAS PERFORMED ############################################## + ########################################################################################################### + ########################################################################################################### + if fileType == "pdb": + if session["solvent"] == True: + if session["add_membrane"] == True: + script.append( + """\n############# Membrane Settings ###################\n""" + ) + script.append("add_membrane = %s" % session["add_membrane"]) + script.append("membrane_lipid_type = '%s'" % session["lipidType"]) + script.append("membrane_padding = %s" % session["membrane_padding"]) + script.append( + "membrane_ionicstrength = %s" % session["membrane_ionicstrength"] + ) + script.append( + "membrane_positive_ion = '%s'" % session["membrane_positive"] + ) + script.append( + "membrane_negative_ion = '%s'" % session["membrane_negative"] + ) + elif session["add_membrane"] == False: + script.append( + """\n############# Water Box Settings ###################\n""" + ) + script.append("add_membrane = %s" % session["add_membrane"]) + if session["water_padding"] == True: script.append('Water_Box = "Buffer"') - script.append("water_padding_distance = %s" % session['water_padding_distance']) - script.append("water_boxShape = '%s'" % session['water_boxShape']) + script.append( + "water_padding_distance = %s" + % session["water_padding_distance"] + ) + script.append("water_boxShape = '%s'" % session["water_boxShape"]) else: script.append('Water_Box = "Absolute"') - script.append("water_box_x = %s" % session['box_x']) - script.append("water_box_y = %s" % session['box_y']) - script.append("water_box_z = %s" % session['box_z']) - script.append("water_ionicstrength = %s" % session['water_ionicstrength']) - script.append("water_positive_ion = '%s'" % session['water_positive']) - script.append("water_negative_ion = '%s'" % session['water_negative']) + script.append("water_box_x = %s" % session["box_x"]) + script.append("water_box_y = %s" % session["box_y"]) + script.append("water_box_z = %s" % session["box_z"]) + script.append( + "water_ionicstrength = %s" % session["water_ionicstrength"] + ) + script.append("water_positive_ion = '%s'" % session["water_positive"]) + script.append("water_negative_ion = '%s'" % session["water_negative"]) else: - if session['solvent'] == False: - script.append("Solvent = %s" % session['solvent']) + if session["solvent"] == False: + script.append("Solvent = %s" % session["solvent"]) + ################################## IF CLEANING WAS NOT PERFORMED ########################################## + ########################################################################################################### + ########################################################################################################### -################################## IF CLEANING WAS NOT PERFORMED ########################################## -########################################################################################################### -########################################################################################################### - # System configuration - script.append('\n# System Configuration\n') - nonbondedMethod = session['nonbondedMethod'] - script.append('nonbondedMethod = app.%s' % nonbondedMethod) - if nonbondedMethod != 'NoCutoff': - script.append('nonbondedCutoff = %s*unit.nanometers' % session['cutoff']) - if nonbondedMethod == 'PME': - script.append('ewaldErrorTolerance = %s' % session['ewaldTol']) - constraints = session['constraints'] - constraintMethods = {'none': 'None', - 'water': 'None', - 'hbonds': 'HBonds', - 'allbonds': 'AllBonds'} - if constraints != 'none' and constraints != 'water': - script.append('constraints = app.%s' % constraintMethods[constraints]) - if constraints == 'none': - script.append('constraints = %s' % constraintMethods[constraints]) - script.append('rigidWater = %s' % ('False' if constraints == 'none' else 'True')) - if constraints != 'none': - script.append('constraintTolerance = %s' % session['constraintTol']) - if session['hmr']: - script.append('hydrogenMass = %s*unit.amu' % session['hmrMass']) + script.append("\n# System Configuration\n") + nonbondedMethod = session["nonbondedMethod"] + script.append("nonbondedMethod = app.%s" % nonbondedMethod) + if nonbondedMethod != "NoCutoff": + script.append("nonbondedCutoff = %s*unit.nanometers" % session["cutoff"]) + if nonbondedMethod == "PME": + script.append("ewaldErrorTolerance = %s" % session["ewaldTol"]) + constraints = session["constraints"] + constraintMethods = { + "none": "None", + "water": "None", + "hbonds": "HBonds", + "allbonds": "AllBonds", + } + if constraints != "none" and constraints != "water": + script.append("constraints = app.%s" % constraintMethods[constraints]) + if constraints == "none": + script.append("constraints = %s" % constraintMethods[constraints]) + script.append("rigidWater = %s" % ("False" if constraints == "none" else "True")) + if constraints != "none": + script.append("constraintTolerance = %s" % session["constraintTol"]) + if session["hmr"]: + script.append("hydrogenMass = %s*unit.amu" % session["hmrMass"]) # Integration options - script.append('\n# Integration Options\n') - script.append('step_time = %s' % session['dt']) - script.append('dt = %s*unit.picoseconds' % session['dt']) - script.append('temperature = %s*unit.kelvin' % session['temperature']) - script.append('friction = %s/unit.picosecond' % session['friction']) - ensemble = session['ensemble'] - if ensemble == 'npt': - script.append('pressure = %s*unit.atmospheres' % session['pressure']) - script.append('barostatInterval = %s' % session['barostatInterval']) + script.append("\n# Integration Options\n") + script.append("step_time = %s" % session["dt"]) + script.append("dt = %s*unit.picoseconds" % session["dt"]) + script.append("temperature = %s*unit.kelvin" % session["temperature"]) + script.append("friction = %s/unit.picosecond" % session["friction"]) + ensemble = session["ensemble"] + if ensemble == "npt": + script.append("pressure = %s*unit.atmospheres" % session["pressure"]) + script.append("barostatInterval = %s" % session["barostatInterval"]) # Simulation options - script.append('\n# Simulation Options\n') - script.append('sim_length = %s' % session['sim_length']) - script.append('steps = int(sim_length / step_time * 1000)') - script.extend(['dcdFrames = %s' % session['dcdFrames'], 'dcdInterval = int(steps / dcdFrames)']) - script.extend(['pdbInterval_ns = %s' % session['pdbInterval_ns'], 'pdbInterval = int(steps * (pdbInterval_ns / sim_length))']) - if session['restart_checkpoint'] == 'yes': - script.append('restart_step = %s' % session['restart_step']) - script.append('equilibration_length = %s' % session['equilibration_length']) - script.append('equilibrationSteps = int(equilibration_length / step_time * 1000)') - script.append("platform = Platform.getPlatformByName('%s')" % session['platform']) - if session['platform'] in ('CUDA', 'OpenCL'): - script.append("platformProperties = {'Precision': '%s'}" % session['precision']) - if session['writeDCD']: - if session['restart_checkpoint'] == 'yes': - script.append("dcdReporter = DCDReporter('%s_%s', dcdInterval)" % (session['restart_step'], session['dcdFilename'])) + script.append("\n# Simulation Options\n") + script.append("sim_length = %s" % session["sim_length"]) + script.append("steps = int(sim_length / step_time * 1000)") + script.extend( + [ + "dcdFrames = %s" % session["dcdFrames"], + "dcdInterval = int(steps / dcdFrames)", + ] + ) + script.extend( + [ + "pdbInterval_ns = %s" % session["pdbInterval_ns"], + "pdbInterval = int(steps * (pdbInterval_ns / sim_length))", + ] + ) + if session["restart_checkpoint"] == "yes": + script.append("restart_step = %s" % session["restart_step"]) + script.append("equilibration_length = %s" % session["equilibration_length"]) + script.append("equilibrationSteps = int(equilibration_length / step_time * 1000)") + script.append("platform = Platform.getPlatformByName('%s')" % session["platform"]) + if session["platform"] in ("CUDA", "OpenCL"): + script.append("platformProperties = {'Precision': '%s'}" % session["precision"]) + if session["writeDCD"]: + if session["restart_checkpoint"] == "yes": + script.append( + "dcdReporter = DCDReporter('%s_%s', dcdInterval)" + % (session["restart_step"], session["dcdFilename"]) + ) else: - script.append("dcdReporter = DCDReporter('%s', dcdInterval)" % (session['dcdFilename'])) - if session['writeData']: - args = ', '.join('%s=True' % field for field in session['dataFields']) - if session['restart_checkpoint'] == 'yes': - script.append("dataReporter = StateDataReporter('%s_%s', %s, totalSteps=steps," % (session['restart_step'], session['dataFilename'], session['dataInterval'])) + script.append( + "dcdReporter = DCDReporter('%s', dcdInterval)" + % (session["dcdFilename"]) + ) + if session["writeData"]: + args = ", ".join("%s=True" % field for field in session["dataFields"]) + if session["restart_checkpoint"] == "yes": + script.append( + "dataReporter = StateDataReporter('%s_%s', %s, totalSteps=steps," + % ( + session["restart_step"], + session["dataFilename"], + session["dataInterval"], + ) + ) else: - script.append("dataReporter = StateDataReporter('%s', %s, totalSteps=steps," % (session['dataFilename'], session['dataInterval'])) + script.append( + "dataReporter = StateDataReporter('%s', %s, totalSteps=steps," + % (session["dataFilename"], session["dataInterval"]) + ) script.append(" %s, separator='\\t')" % args) if isInternal: # Create a second reporting sending to stdout so we can display it in the browser. - script.append("consoleReporter = StateDataReporter(sys.stdout, %s, totalSteps=steps, %s, separator='\\t')" % (session['dataInterval'], args)) - if session['writeCheckpoint']: - script.append("checkpointInterval = int(1000 * %s / %s)" % (session['checkpointInterval_ns'], session['dt'])) - if session['restart_checkpoint'] == 'yes': - script.append("checkpointReporter = CheckpointReporter('%s_%s', checkpointInterval)" % (session['restart_step'], session['checkpointFilename'])) - script.append("checkpointReporter10 = CheckpointReporter('10x_%s__%s', checkpointInterval *10)" % (session['restart_step'], session['checkpointFilename'])) - script.append("checkpointReporter100 = CheckpointReporter('100x_%s_%s', checkpointInterval *100)" % (session['restart_step'], session['checkpointFilename'])) + script.append( + "consoleReporter = StateDataReporter(sys.stdout, %s, totalSteps=steps, %s, separator='\\t')" + % (session["dataInterval"], args) + ) + if session["writeCheckpoint"]: + script.append( + "checkpointInterval = int(1000 * %s / %s)" + % (session["checkpointInterval_ns"], session["dt"]) + ) + if session["restart_checkpoint"] == "yes": + script.append( + "checkpointReporter = CheckpointReporter('%s_%s', checkpointInterval)" + % (session["restart_step"], session["checkpointFilename"]) + ) + script.append( + "checkpointReporter10 = CheckpointReporter('10x_%s__%s', checkpointInterval *10)" + % (session["restart_step"], session["checkpointFilename"]) + ) + script.append( + "checkpointReporter100 = CheckpointReporter('100x_%s_%s', checkpointInterval *100)" + % (session["restart_step"], session["checkpointFilename"]) + ) else: - script.append("checkpointReporter = CheckpointReporter('%s', checkpointInterval)" % (session['checkpointFilename'])) - script.append("checkpointReporter10 = CheckpointReporter('10x_%s', checkpointInterval* 10)" % (session['checkpointFilename'])) - script.append("checkpointReporter100 = CheckpointReporter('100x_%s', checkpointInterval* 100)" % (session['checkpointFilename'])) - + script.append( + "checkpointReporter = CheckpointReporter('%s', checkpointInterval)" + % (session["checkpointFilename"]) + ) + script.append( + "checkpointReporter10 = CheckpointReporter('10x_%s', checkpointInterval* 10)" + % (session["checkpointFilename"]) + ) + script.append( + "checkpointReporter100 = CheckpointReporter('100x_%s', checkpointInterval* 100)" + % (session["checkpointFilename"]) + ) + # Prepare the simulation - - if fileType == 'pdb': - if session['sdfFile'] != '': - script.append(''' + + if fileType == "pdb": + if session["sdfFile"] != "": + script.append( + """ print("Preparing MD Simulation with ligand") ligand_prepared = prepare_ligand(ligand,minimize_molecule=minimization) omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) @@ -1109,9 +1439,11 @@ def write(self, string): transitional_forcefield = generate_transitional_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) forcefield = generate_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) -print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") ''') - elif session['sdfFile'] == '': - script.append(''' +print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") """ + ) + elif session["sdfFile"] == "": + script.append( + """ protein_pdb = PDBFile(protein) forcefield_selected = ff_selection(ff) water_selected = water_forcefield_selection(water=water,forcefield_selection=ff_selection(ff)) @@ -1122,9 +1454,11 @@ def write(self, string): else: forcefield = app.ForceField(forcefield_selected) if add_membrane == True: - transitional_forcefield = generate_transitional_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=None) ''') - if session['sdfFile'] == '': - script.append(''' + transitional_forcefield = generate_transitional_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=None) """ + ) + if session["sdfFile"] == "": + script.append( + """ forcefield = generate_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=None) modeller = app.Modeller(protein_pdb.topology, protein_pdb.positions) if add_membrane == True: @@ -1138,9 +1472,11 @@ def write(self, string): if model_water == 'tip4pew' or model_water == 'tip5p': water_conversion(model_water, modeller, protein) topology = modeller.topology -positions = modeller.positions ''') - elif session['sdfFile'] != '': - script.append(''' +positions = modeller.positions """ + ) + elif session["sdfFile"] != "": + script.append( + """ modeller = app.Modeller(complex_topology, complex_positions) if add_membrane == True: membrane_builder(ff, model_water, forcefield, transitional_forcefield, protein_pdb, modeller, membrane_lipid_type, membrane_padding, membrane_positive_ion, membrane_negative_ion, membrane_ionicstrength, protein) @@ -1153,243 +1489,630 @@ def write(self, string): if model_water == 'tip4pew' or model_water == 'tip5p': water_conversion(model_water, modeller, protein) topology = modeller.topology -positions = modeller.positions ''') - elif fileType == 'amber': - script.append('topology = prmtop.topology') - script.append('positions = inpcrd.positions') +positions = modeller.positions """ + ) + elif fileType == "amber": + script.append("topology = prmtop.topology") + script.append("positions = inpcrd.positions") - script.append('\n# Prepare the Simulation\n') + script.append("\n# Prepare the Simulation\n") script.append("print('Building system...')") - hmrOptions = ', hydrogenMass=hydrogenMass' if session['hmr'] else '' - if fileType == 'pdb': - script.append('system = forcefield.createSystem(topology, nonbondedMethod=nonbondedMethod,%s' % (' nonbondedCutoff=nonbondedCutoff,' if nonbondedMethod != 'NoCutoff' else '')) - script.append(' constraints=constraints, rigidWater=rigidWater%s%s)' % (', ewaldErrorTolerance=ewaldErrorTolerance' if nonbondedMethod == 'PME' else '', hmrOptions)) - elif fileType == 'amber': - script.append('system = prmtop.createSystem(nonbondedMethod=nonbondedMethod,%s' % (' nonbondedCutoff=nonbondedCutoff,' if nonbondedMethod != 'NoCutoff' else '')) - script.append(' constraints=constraints, rigidWater=rigidWater%s%s)' % (', ewaldErrorTolerance=ewaldErrorTolerance' if nonbondedMethod == 'PME' else '', hmrOptions)) - if ensemble == 'npt': - script.append('system.addForce(MonteCarloBarostat(pressure, temperature, barostatInterval))') - script.append('integrator = LangevinMiddleIntegrator(temperature, friction, dt)') - if constraints != 'none': - script.append('integrator.setConstraintTolerance(constraintTolerance)') - script.append('simulation = app.Simulation(topology, system, integrator, platform%s)' % (', platformProperties' if session['platform'] in ('CUDA', 'OpenCL') else '')) - script.append('simulation.context.setPositions(positions)') - if fileType == 'amber': - script.append('if inpcrd.boxVectors is not None:') - script.append(' simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)') + hmrOptions = ", hydrogenMass=hydrogenMass" if session["hmr"] else "" + if fileType == "pdb": + script.append( + "system = forcefield.createSystem(topology, nonbondedMethod=nonbondedMethod,%s" + % ( + " nonbondedCutoff=nonbondedCutoff," + if nonbondedMethod != "NoCutoff" + else "" + ) + ) + script.append( + " constraints=constraints, rigidWater=rigidWater%s%s)" + % ( + ( + ", ewaldErrorTolerance=ewaldErrorTolerance" + if nonbondedMethod == "PME" + else "" + ), + hmrOptions, + ) + ) + elif fileType == "amber": + script.append( + "system = prmtop.createSystem(nonbondedMethod=nonbondedMethod,%s" + % ( + " nonbondedCutoff=nonbondedCutoff," + if nonbondedMethod != "NoCutoff" + else "" + ) + ) + script.append( + " constraints=constraints, rigidWater=rigidWater%s%s)" + % ( + ( + ", ewaldErrorTolerance=ewaldErrorTolerance" + if nonbondedMethod == "PME" + else "" + ), + hmrOptions, + ) + ) + if ensemble == "npt": + script.append( + "system.addForce(MonteCarloBarostat(pressure, temperature, barostatInterval))" + ) + script.append("integrator = LangevinMiddleIntegrator(temperature, friction, dt)") + if constraints != "none": + script.append("integrator.setConstraintTolerance(constraintTolerance)") + script.append( + "simulation = app.Simulation(topology, system, integrator, platform%s)" + % (", platformProperties" if session["platform"] in ("CUDA", "OpenCL") else "") + ) + script.append("simulation.context.setPositions(positions)") + if fileType == "amber": + script.append("if inpcrd.boxVectors is not None:") + script.append( + " simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)" + ) # Output XML files for system and integrator - if session['writeSimulationXml']: + if session["writeSimulationXml"]: + def _xml_script_segment(to_serialize, target_file): if target_file == "": # if filename is blank, we cannot create the file return [] return [ f'with open("{target_file}", mode="w") as file:', - f' file.write(XmlSerializer.serialize({to_serialize}))' + f" file.write(XmlSerializer.serialize({to_serialize}))", ] script.append("\n# Write XML serialized objects\n") - script.extend(_xml_script_segment('system', session['systemXmlFilename'])) - script.extend(_xml_script_segment('integrator', session['integratorXmlFilename'])) + script.extend(_xml_script_segment("system", session["systemXmlFilename"])) + script.extend( + _xml_script_segment("integrator", session["integratorXmlFilename"]) + ) # Minimize and equilibrate - - script.append('\n# Minimize and Equilibrate\n') + + script.append("\n# Minimize and Equilibrate\n") script.append("print('Performing energy minimization...')") - script.append('simulation.minimizeEnergy()') - if fileType == 'pdb': - script.append(""" + script.append("simulation.minimizeEnergy()") + if fileType == "pdb": + script.append( + """ with open(f'Energyminimization_{protein}', 'w') as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - """) - elif fileType == 'amber': - script.append(""" + """ + ) + elif fileType == "amber": + script.append( + """ with open(f'Energyminimization_{prmtop_file[:-7]}.pdb', 'w') as outfile: PDBFile.writeFile(prmtop.topology, inpcrd.positions, outfile) - """) + """ + ) script.append("print('Equilibrating...')") - script.append('simulation.context.setVelocitiesToTemperature(temperature)') - script.append('simulation.step(equilibrationSteps)') - if fileType == 'pdb': - script.append(""" + script.append("simulation.context.setVelocitiesToTemperature(temperature)") + script.append("simulation.step(equilibrationSteps)") + if fileType == "pdb": + script.append( + """ with open(f'Equilibration_{protein}', 'w') as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - """) - elif fileType == 'amber': - script.append(""" + """ + ) + elif fileType == "amber": + script.append( + """ with open(f'Equilibration_{prmtop_file[:-7]}.pdb', 'w') as outfile: PDBFile.writeFile(prmtop.topology, inpcrd.positions, outfile) - """) - if session['restart_checkpoint'] == 'yes': - script.append("simulation.loadCheckpoint('%s')" % session['checkpointFilename']) - + """ + ) + if session["restart_checkpoint"] == "yes": + script.append("simulation.loadCheckpoint('%s')" % session["checkpointFilename"]) + # Simulate - - script.append('\n# Simulate\n') + + script.append("\n# Simulate\n") script.append("print('Simulating...')") - if session['restart_checkpoint'] == 'yes': - if fileType == 'pdb': - script.append("simulation.reporters.append(PDBReporter(f'restart_output_{protein}', pdbInterval))") - elif fileType == 'amber': - script.append("simulation.reporters.append(PDBReporter(f'restart_output_{prmtop_file}', pdbInterval))") + if session["restart_checkpoint"] == "yes": + if fileType == "pdb": + script.append( + "simulation.reporters.append(PDBReporter(f'restart_output_{protein}', pdbInterval))" + ) + elif fileType == "amber": + script.append( + "simulation.reporters.append(PDBReporter(f'restart_output_{prmtop_file}', pdbInterval))" + ) else: - if fileType == 'pdb': - script.append("simulation.reporters.append(PDBReporter(f'output_{protein}', pdbInterval))") - elif fileType == 'amber': - script.append("simulation.reporters.append(PDBReporter(f'output_{prmtop_file[:-7]}.pdb', pdbInterval))") - if session['writeDCD']: - script.append('simulation.reporters.append(dcdReporter)') - if session['writeData']: - script.append('simulation.reporters.append(dataReporter)') + if fileType == "pdb": + script.append( + "simulation.reporters.append(PDBReporter(f'output_{protein}', pdbInterval))" + ) + elif fileType == "amber": + script.append( + "simulation.reporters.append(PDBReporter(f'output_{prmtop_file[:-7]}.pdb', pdbInterval))" + ) + if session["writeDCD"]: + script.append("simulation.reporters.append(dcdReporter)") + if session["writeData"]: + script.append("simulation.reporters.append(dataReporter)") if isInternal: - script.append('simulation.reporters.append(consoleReporter)') - if session['writeCheckpoint']: - script.append('simulation.reporters.append(checkpointReporter)') - script.append('simulation.reporters.append(checkpointReporter10)') - script.append('simulation.reporters.append(checkpointReporter100)') - script.append('simulation.reporters.append(StateDataReporter(sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True))') - if session['restart_checkpoint'] == 'yes': - script.append('simulation.currentStep = %s' % session ['restart_step']) + script.append("simulation.reporters.append(consoleReporter)") + if session["writeCheckpoint"]: + script.append("simulation.reporters.append(checkpointReporter)") + script.append("simulation.reporters.append(checkpointReporter10)") + script.append("simulation.reporters.append(checkpointReporter100)") + script.append( + "simulation.reporters.append(StateDataReporter(sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True))" + ) + if session["restart_checkpoint"] == "yes": + script.append("simulation.currentStep = %s" % session["restart_step"]) else: - script.append('simulation.currentStep = 0') - script.append('simulation.step(steps)') + script.append("simulation.currentStep = 0") + script.append("simulation.step(steps)") # Output final simulation state - if session['writeFinalState']: + if session["writeFinalState"]: script.append("\n# Write file with final simulation state\n") state_script = { - 'checkpoint': ['simulation.saveCheckpoint("{filename}")'], - 'stateXML': ['simulation.saveState("{filename}")'], - 'pdbx': ['state = simulation.context.getState(getPositions=True, enforcePeriodicBox=system.usesPeriodicBoundaryConditions())', - 'with open("{filename}", mode="w") as file:', - ' PDBxFile.writeFile(simulation.topology, state.getPositions(), file)'], - }[session['finalStateFileType']] - lines = [line.format(filename=session['finalStateFilename']) for line in state_script] + "checkpoint": ['simulation.saveCheckpoint("{filename}")'], + "stateXML": ['simulation.saveState("{filename}")'], + "pdbx": [ + "state = simulation.context.getState(getPositions=True, enforcePeriodicBox=system.usesPeriodicBoundaryConditions())", + 'with open("{filename}", mode="w") as file:', + " PDBxFile.writeFile(simulation.topology, state.getPositions(), file)", + ], + }[session["finalStateFileType"]] + lines = [ + line.format(filename=session["finalStateFilename"]) for line in state_script + ] script.extend(lines) - # session[md_postprocessing] - if session ['md_postprocessing'] == 'True': + if session["md_postprocessing"] == "True": # mdtraj_conversion() and MDanalysis_conversion() if fileType == "pdb": - script.append("mdtraj_conversion(f'Equilibration_{protein}', '%s')" % session['mdtraj_output']) - if session['sdfFile']: - if session['mdtraj_output'] != 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name='UNK')" % (session['mda_output'], session['mda_selection'])) - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name='UNK')" % (session['mda_output'], session['mda_selection'])) - elif session['sdfFile'] == '': - if session['mdtraj_output'] != 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s')" % (session['mda_output'], session['mda_selection'])) - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s')" % (session['mda_output'], session['mda_selection'])) + script.append( + "mdtraj_conversion(f'Equilibration_{protein}', '%s')" + % session["mdtraj_output"] + ) + if session["sdfFile"]: + if session["mdtraj_output"] != "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name='UNK')" + % (session["mda_output"], session["mda_selection"]) + ) + elif session["mdtraj_output"] == "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name='UNK')" + % (session["mda_output"], session["mda_selection"]) + ) + elif session["sdfFile"] == "": + if session["mdtraj_output"] != "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s')" + % (session["mda_output"], session["mda_selection"]) + ) + elif session["mdtraj_output"] == "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s')" + % (session["mda_output"], session["mda_selection"]) + ) elif fileType == "amber": - script.append("mdtraj_conversion(prmtop_file, '%s')" % session['mdtraj_output']) - if session['nmLig'] == False and session['spLig'] == False: - if session['mdtraj_output'] != 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s')" % (session['mda_output'], session['mda_selection'])) - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s')" % (session['mda_output'], session['mda_selection'])) - elif session['nmLig'] and session['spLig'] == False: - if session['mdtraj_output'] != 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name=%s)" % (session['mda_output'], session['mda_selection'], f"'{nmLigName}'")) - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name=%s)" % (session['mda_output'], session['mda_selection'], f"'{nmLigName}'")) - elif session['nmLig'] and session['spLig']: - if session['mdtraj_output'] != 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name=%s, special_ligname=%s)" % (session['mda_output'], session['mda_selection'], f"'{nmLigName}'", f"'{spLigName}'")) - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': - script.append("MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name=%s, special_ligname=%s)" % (session['mda_output'], session['mda_selection'], f"'{nmLigName}'", f"'{spLigName}'")) + script.append( + "mdtraj_conversion(prmtop_file, '%s')" % session["mdtraj_output"] + ) + if session["nmLig"] == False and session["spLig"] == False: + if session["mdtraj_output"] != "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s')" + % (session["mda_output"], session["mda_selection"]) + ) + elif session["mdtraj_output"] == "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s')" + % (session["mda_output"], session["mda_selection"]) + ) + elif session["nmLig"] and session["spLig"] == False: + if session["mdtraj_output"] != "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name=%s)" + % ( + session["mda_output"], + session["mda_selection"], + f"'{nmLigName}'", + ) + ) + elif session["mdtraj_output"] == "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name=%s)" + % ( + session["mda_output"], + session["mda_selection"], + f"'{nmLigName}'", + ) + ) + elif session["nmLig"] and session["spLig"]: + if session["mdtraj_output"] != "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.pdb', 'centered_old_coordinates.dcd', mda_output='%s', output_selection='%s', ligand_name=%s, special_ligname=%s)" + % ( + session["mda_output"], + session["mda_selection"], + f"'{nmLigName}'", + f"'{spLigName}'", + ) + ) + elif session["mdtraj_output"] == "mdtraj_gro_xtc": + script.append( + "MDanalysis_conversion('centered_old_coordinates_top.gro', 'centered_old_coordinates.xtc', mda_output='%s', output_selection='%s', ligand_name=%s, special_ligname=%s)" + % ( + session["mda_output"], + session["mda_selection"], + f"'{nmLigName}'", + f"'{spLigName}'", + ) + ) # cleanup() - if session['mdtraj_removal'] == "True": + if session["mdtraj_removal"] == "True": if fileType == "pdb": script.append("cleanup(f'{protein}')") - elif fileType == 'amber': + elif fileType == "amber": script.append("cleanup(f'{prmtop_file}')") - # post_md_file_movement() + # post_md_file_movement() if fileType == "pdb": - if session['sdfFile']: + if session["sdfFile"]: script.append("post_md_file_movement(protein,ligands=[ligand])") - elif session['sdfFile'] == '': + elif session["sdfFile"] == "": script.append("post_md_file_movement(protein)") elif fileType == "amber": - if session['has_files'] == 'yes': # In this case, no ligand file will be uploaded, thus not neccessary to assign value to argument `ligands` - script.append("post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file)") - elif session['has_files'] == 'no': - if session['nmLig'] == False and session['spLig'] == False: - script.append("post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file)") - elif session['nmLig'] and session['spLig'] == False: - script.append("post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file, ligands=['%s'])" % nmLigFileName) - elif session['nmLig'] and session['spLig']: - script.append("post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file, ligands=['%s', '%s'])" % (nmLigFileName, spLigFileName)) + if ( + session["has_files"] == "yes" + ): # In this case, no ligand file will be uploaded, thus not neccessary to assign value to argument `ligands` + script.append( + "post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file)" + ) + elif session["has_files"] == "no": + if session["nmLig"] == False and session["spLig"] == False: + script.append( + "post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file)" + ) + elif session["nmLig"] and session["spLig"] == False: + script.append( + "post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file, ligands=['%s'])" + % nmLigFileName + ) + elif session["nmLig"] and session["spLig"]: + script.append( + "post_md_file_movement(protein_name=f'{prmtop_file[:-7]}.pdb', prmtop=prmtop_file, inpcrd=inpcrd_file, ligands=['%s', '%s'])" + % (nmLigFileName, spLigFileName) + ) # session[openmmdl_analysis] - if session['openmmdl_analysis'] == "Yes": - if session['mdtraj_output'] != 'mdtraj_gro_xtc': + if session["openmmdl_analysis"] == "Yes": + if session["mdtraj_output"] != "mdtraj_gro_xtc": top_ext = ".pdb" traj_ext = ".dcd" - elif session['mdtraj_output'] == 'mdtraj_gro_xtc': + elif session["mdtraj_output"] == "mdtraj_gro_xtc": top_ext = ".gro" traj_ext = ".xtc" - # session[analysis_selection] == 'analysis_all' - if session['analysis_selection'] == 'analysis_all': + # session[analysis_selection] == 'analysis_all' + if session["analysis_selection"] == "analysis_all": script.append("os.chdir('Final_Output/All_Atoms')") if fileType == "pdb": - if session['sdfFile']: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['sdfFile'], session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['sdfFile'] == '': - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + if session["sdfFile"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["sdfFile"], + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["sdfFile"] == "": + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) elif fileType == "amber": - if session['nmLig'] == False and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig']: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, spLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + if session["nmLig"] == False and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + spLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) # session[analysis_selection] == 'analysis_prot' - elif session['analysis_selection'] == 'analysis_prot_lig': + elif session["analysis_selection"] == "analysis_prot_lig": script.append("os.chdir('Final_Output/Prot_Lig')") if fileType == "pdb": - if session['sdfFile']: - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['sdfFile'], session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['sdfFile'] == '': - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif fileType == 'amber': - if session['nmLig'] == False and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig']: - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, spLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + if session["sdfFile"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["sdfFile"], + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["sdfFile"] == "": + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif fileType == "amber": + if session["nmLig"] == False and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + spLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) # session[analysis_selection] == 'analysis_all_prot' - elif session['analysis_selection'] == 'analysis_all_prot_lig': + elif session["analysis_selection"] == "analysis_all_prot_lig": if fileType == "pdb": script.append("os.chdir('Final_Output/All_Atoms')") - if session['sdfFile']: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['sdfFile'], session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + if session["sdfFile"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["sdfFile"], + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) script.append("os.chdir('../Prot_Lig')") - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['sdfFile'], session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['sdfFile'] == '': - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -l %s -n UNK -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["sdfFile"], + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["sdfFile"] == "": + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) script.append("os.chdir('../Prot_Lig')") - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif fileType == 'amber': + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif fileType == "amber": script.append("os.chdir('Final_Output/All_Atoms')") - if session['nmLig'] == False and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + if session["nmLig"] == False and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) script.append("os.chdir('../Prot_Lig')") - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig'] == False: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"] == False: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) script.append("os.chdir('../Prot_Lig')") - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - elif session['nmLig'] and session['spLig']: - script.append("analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, spLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + elif session["nmLig"] and session["spLig"]: + script.append( + "analysis_run_command = 'openmmdl_analysis -t centered_top%s -d centered_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + spLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) script.append("os.chdir('../Prot_Lig')") - script.append("analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " % (top_ext, traj_ext, nmLigName, spLigName, session['binding_mode'], session['min_transition'], session['rmsd_diff'], session['pml_generation'], session['stable_water'], session['wc_distance'])) - + script.append( + "analysis_run_command = 'openmmdl_analysis -t prot_lig_top%s -d prot_lig_traj%s -n %s -s %s -b %s -m %s -r %s -p %s -w %s --watereps %s' " + % ( + top_ext, + traj_ext, + nmLigName, + spLigName, + session["binding_mode"], + session["min_transition"], + session["rmsd_diff"], + session["pml_generation"], + session["stable_water"], + session["wc_distance"], + ) + ) + return "\n".join(script) @@ -1398,11 +2121,12 @@ def main(): def open_browser(): # Give the server a moment to start before opening the browser. time.sleep(1) - url = 'http://127.0.0.1:5000' + url = "http://127.0.0.1:5000" webbrowser.open(url) threading.Thread(target=open_browser).start() app.run(debug=False) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/openmmdl/openmmdl_simulation/openmmdlsimulation.py b/openmmdl/openmmdl_simulation/openmmdlsimulation.py index 729df012..7d8ac5e0 100644 --- a/openmmdl/openmmdl_simulation/openmmdlsimulation.py +++ b/openmmdl/openmmdl_simulation/openmmdlsimulation.py @@ -2,44 +2,68 @@ mmdl_simulation.py Perform Simulations of Protein-ligand complexes with OpenMM """ + import argparse import sys import os import shutil import argparse -parser = argparse.ArgumentParser() - -logo = '\n'.join([" ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. ", - " .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| ", - " / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) ", - " ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) ", - " | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) ", - " : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' ", - " \ `_/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ ", - " '. \_/``'.' / ) \ / | | | || | | || | | || .' | \ ", - " '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` ", - " Prepare and Perform OpenMM Protein-Ligand MD Simulations ", - " Alpha Version "]) +parser = argparse.ArgumentParser() +logo = "\n".join( + [ + " ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. ", + " .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| ", + " / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) ", + " ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) ", + " | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) ", + " : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' ", + " \ `_/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ ", + " '. \_/``'.' / ) \ / | | | || | | || | | || .' | \ ", + " '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` ", + " Prepare and Perform OpenMM Protein-Ligand MD Simulations ", + " Alpha Version ", + ] +) def main(): - parser = argparse.ArgumentParser(prog='openmmdl_simulation', description=logo, formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument('-f', dest='folder', type=str, help='Folder Name for MD Simulation', required=True,) - parser.add_argument('-s', dest='script', type=str, help='MD Simulation script', required=True,) - parser.add_argument('-t', dest='topology', help='Protein Topology PDB/Amber File', required=True) - parser.add_argument('-l', dest='ligand', help='SDF File of Ligand', default=None) - parser.add_argument('-c', dest='coordinate', help='Amber coordinates file', default=None) - input_formats = ['.py', '.pdb', '.sdf', '.mol', '.prmtop', '.inpcrd'] + parser = argparse.ArgumentParser( + prog="openmmdl_simulation", + description=logo, + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-f", + dest="folder", + type=str, + help="Folder Name for MD Simulation", + required=True, + ) + parser.add_argument( + "-s", + dest="script", + type=str, + help="MD Simulation script", + required=True, + ) + parser.add_argument( + "-t", dest="topology", help="Protein Topology PDB/Amber File", required=True + ) + parser.add_argument("-l", dest="ligand", help="SDF File of Ligand", default=None) + parser.add_argument( + "-c", dest="coordinate", help="Amber coordinates file", default=None + ) + input_formats = [".py", ".pdb", ".sdf", ".mol", ".prmtop", ".inpcrd"] args = parser.parse_args() if not os.path.exists(args.folder): os.mkdir(args.folder) else: shutil.rmtree(args.folder) os.mkdir(args.folder) - script_dir = os.path.abspath( os.path.dirname( __file__ )) + script_dir = os.path.abspath(os.path.dirname(__file__)) if os.path.exists(args.folder): if input_formats[0] in args.script: if os.path.exists(args.script): @@ -76,7 +100,7 @@ def main(): print("Wrong Format, don't forget the .inpcrd of the coordinate file") os.chdir(args.folder) os.system(f"python3 *.py") - + + if __name__ == "__main__": main() - diff --git a/openmmdl/openmmdl_simulation/scripts/cleaning_procedures.py b/openmmdl/openmmdl_simulation/scripts/cleaning_procedures.py index 0dc00cd9..b0b4f38a 100644 --- a/openmmdl/openmmdl_simulation/scripts/cleaning_procedures.py +++ b/openmmdl/openmmdl_simulation/scripts/cleaning_procedures.py @@ -14,9 +14,9 @@ def cleanup(protein_name): """ print("Cleaning Up :)") try: - os.remove(f'output_{protein_name}') - os.remove(f'centered_old_coordinates.pdb') - os.remove(f'centered_old_coordinates.dcd') + os.remove(f"output_{protein_name}") + os.remove(f"centered_old_coordinates.pdb") + os.remove(f"centered_old_coordinates.dcd") except FileNotFoundError: print("One or more files not found. Cleanup skipped.") print("Cleanup is done.") @@ -112,7 +112,8 @@ def post_md_file_movement( ] destination_pre_md = "MD_Files/Pre_MD" organize_files( - [f"{prefix}{protein_name}" for prefix in source_pre_md_files], destination_pre_md + [f"{prefix}{protein_name}" for prefix in source_pre_md_files], + destination_pre_md, ) # Organize topology files after minimization and equilibration @@ -124,9 +125,7 @@ def post_md_file_movement( ) # Organize simulation output files - organize_files( - [f"output_{protein_name}", "trajectory.dcd"], "MD_Files/MD_Output" - ) + organize_files([f"output_{protein_name}", "trajectory.dcd"], "MD_Files/MD_Output") # Organize MDtraj and MDAnalysis files organize_files( diff --git a/openmmdl/openmmdl_simulation/scripts/forcefield_water.py b/openmmdl/openmmdl_simulation/scripts/forcefield_water.py index 5acc18df..7ca74085 100644 --- a/openmmdl/openmmdl_simulation/scripts/forcefield_water.py +++ b/openmmdl/openmmdl_simulation/scripts/forcefield_water.py @@ -14,12 +14,12 @@ def ff_selection(ff): str: Selected XML forcefield file. """ forcefield_dict = { - 'AMBER14': 'amber14-all.xml', - 'AMBER99SB': 'amber99sb.xml', - 'AMBER99SB-ILDN': 'amber99sbildn.xml', - 'AMBER03': 'amber03.xml', - 'AMBER10': 'amber10.xml', - 'CHARMM36': 'charmm36.xml', + "AMBER14": "amber14-all.xml", + "AMBER99SB": "amber99sb.xml", + "AMBER99SB-ILDN": "amber99sbildn.xml", + "AMBER03": "amber03.xml", + "AMBER10": "amber10.xml", + "CHARMM36": "charmm36.xml", } return forcefield_dict.get(ff, None) @@ -36,39 +36,39 @@ def water_forcefield_selection(water, forcefield_selection): Returns: str: The XML filename of the water forcefield. """ - old_amber = {'amber99sb.xml', 'amber99sbildn.xml', 'amber03.xml', 'amber10.xml'} + old_amber = {"amber99sb.xml", "amber99sbildn.xml", "amber03.xml", "amber10.xml"} # Define a dictionary to map water models water_model_mapping = { - 'TIP3P': 'tip3p.xml', - 'TIP3P-FB': 'tip3pfb.xml', - 'SPC/E': 'spce.xml', - 'TIP4P-Ew': 'tip4pew.xml', - 'TIP4P-FB': 'tip4pfb.xml', - 'TIP5P': 'tip5p.xml' + "TIP3P": "tip3p.xml", + "TIP3P-FB": "tip3pfb.xml", + "SPC/E": "spce.xml", + "TIP4P-Ew": "tip4pew.xml", + "TIP4P-FB": "tip4pfb.xml", + "TIP5P": "tip5p.xml", } if forcefield_selection in old_amber: water_model = water_model_mapping.get(water, None) else: water_forcefields = { - 'amber14-all.xml': { - 'TIP3P': 'amber14/tip3p.xml', - 'TIP3P-FB': 'amber14/tip3pfb.xml', - 'SPC/E': 'amber14/spce.xml', - 'TIP4P-Ew': 'amber14/tip4pew.xml', - 'TIP4P-FB': 'amber14/tip4pfb.xml', + "amber14-all.xml": { + "TIP3P": "amber14/tip3p.xml", + "TIP3P-FB": "amber14/tip3pfb.xml", + "SPC/E": "amber14/spce.xml", + "TIP4P-Ew": "amber14/tip4pew.xml", + "TIP4P-FB": "amber14/tip4pfb.xml", + }, + "charmm36.xml": { + "CHARMM default": "charmm36/water.xml", + "TIP3P-PME-B": "charmm36/tip3p-pme-b.xml", + "TIP3P-PME-F": "charmm36/tip3p-pme-f.xml", + "SPC/E": "charmm36/spce.xml", + "TIP4P-Ew": "charmm36/tip4pew.xml", + "TIP4P-2005": "charmm36/tip4p2005.xml", + "TIP5P": "charmm36/tip5p.xml", + "TIP5P-Ew": "charmm36/tip5pew.xml", }, - 'charmm36.xml': { - 'CHARMM default': 'charmm36/water.xml', - 'TIP3P-PME-B': 'charmm36/tip3p-pme-b.xml', - 'TIP3P-PME-F': 'charmm36/tip3p-pme-f.xml', - 'SPC/E': 'charmm36/spce.xml', - 'TIP4P-Ew': 'charmm36/tip4pew.xml', - 'TIP4P-2005': 'charmm36/tip4p2005.xml', - 'TIP5P': 'charmm36/tip5p.xml', - 'TIP5P-Ew': 'charmm36/tip5pew.xml', - } } water_model = water_forcefields.get(forcefield_selection, {}).get(water, None) @@ -86,32 +86,32 @@ def water_model_selection(water, forcefield_selection): Returns: str: Water model forcefield XML file. """ - old_amber = {'amber99sb.xml', 'amber99sbildn.xml', 'amber03.xml', 'amber10.xml'} + old_amber = {"amber99sb.xml", "amber99sbildn.xml", "amber03.xml", "amber10.xml"} water_model_mapping = { - 'TIP3P': 'tip3p', - 'TIP3P-FB': 'tip3pfb', - 'SPC/E': 'spce', - 'TIP4P-Ew': 'tip4pew', - 'TIP4P-FB': 'tip4pfb', + "TIP3P": "tip3p", + "TIP3P-FB": "tip3pfb", + "SPC/E": "spce", + "TIP4P-Ew": "tip4pew", + "TIP4P-FB": "tip4pfb", } if forcefield_selection in old_amber: water_model = water_model_mapping.get(water) - elif forcefield_selection == 'amber14-all.xml': - if water == 'TIP5P': + elif forcefield_selection == "amber14-all.xml": + if water == "TIP5P": return None # 'TIP5P' is not available in 'amber14-all.xml' water_model = water_model_mapping.get(water) - elif forcefield_selection == 'charmm36.xml': + elif forcefield_selection == "charmm36.xml": charmm_water_mapping = { - 'CHARMM default': 'charmm', - 'TIP3P-PME-B': 'charmm', - 'TIP3P-PME-F': 'charmm', - 'SPC/E': 'charmm', - 'TIP4P-Ew': 'tip4pew', - 'TIP4P-2005': 'tip4pew', - 'TIP5P': 'tip5p', - 'TIP5P-Ew': 'tip5p', + "CHARMM default": "charmm", + "TIP3P-PME-B": "charmm", + "TIP3P-PME-F": "charmm", + "SPC/E": "charmm", + "TIP4P-Ew": "tip4pew", + "TIP4P-2005": "tip4pew", + "TIP5P": "tip5p", + "TIP5P-Ew": "tip5p", } water_model = charmm_water_mapping.get(water) else: @@ -133,12 +133,12 @@ def generate_forcefield(protein_ff, solvent_ff, add_membrane, rdkit_mol=None): Returns: simtk.openmm.app.Forcefield: Forcefield with a registered small molecule. """ - old_amber = {'amber99sb.xml', 'amber99sbildn.xml', 'amber03.xml', 'amber10.xml'} + old_amber = {"amber99sb.xml", "amber99sbildn.xml", "amber03.xml", "amber10.xml"} # For older amber forcefields, the additional lipid17.xml is required for templates if add_membrane == True: if protein_ff in old_amber: - forcefield = app.ForceField(protein_ff, solvent_ff, 'amber14/lipid17.xml') + forcefield = app.ForceField(protein_ff, solvent_ff, "amber14/lipid17.xml") else: forcefield = app.ForceField(protein_ff, solvent_ff) else: @@ -148,14 +148,16 @@ def generate_forcefield(protein_ff, solvent_ff, add_membrane, rdkit_mol=None): if rdkit_mol is not None: gaff = GAFFTemplateGenerator( molecules=Molecule.from_rdkit(rdkit_mol, allow_undefined_stereo=True), - forcefield='gaff-2.11' + forcefield="gaff-2.11", ) forcefield.registerTemplateGenerator(gaff.generator) return forcefield -def generate_transitional_forcefield(protein_ff, solvent_ff, add_membrane, rdkit_mol=None): +def generate_transitional_forcefield( + protein_ff, solvent_ff, add_membrane, rdkit_mol=None +): """ Generate an OpenMM transitional forcefield object with TIP3P water model for membrane building and register a small molecule. @@ -168,14 +170,16 @@ def generate_transitional_forcefield(protein_ff, solvent_ff, add_membrane, rdkit Returns: simtk.openmm.app.Forcefield: A transitional forcefield with TIP3P water and a registered small molecule. """ - old_amber = {'amber99sb.xml', 'amber99sbildn.xml', 'amber03.xml', 'amber10.xml'} + old_amber = {"amber99sb.xml", "amber99sbildn.xml", "amber03.xml", "amber10.xml"} # For older amber forcefields, the additional lipid17.xml is required for templates if add_membrane == True: if protein_ff in old_amber: - transitional_forcefield = app.ForceField(protein_ff, 'tip3p.xml', 'amber14/lipid17.xml') + transitional_forcefield = app.ForceField( + protein_ff, "tip3p.xml", "amber14/lipid17.xml" + ) else: - transitional_forcefield = app.ForceField(protein_ff, 'amber14/tip3p.xml') + transitional_forcefield = app.ForceField(protein_ff, "amber14/tip3p.xml") else: transitional_forcefield = app.ForceField(protein_ff, solvent_ff) @@ -183,7 +187,7 @@ def generate_transitional_forcefield(protein_ff, solvent_ff, add_membrane, rdkit if rdkit_mol is not None: gaff = GAFFTemplateGenerator( molecules=Molecule.from_rdkit(rdkit_mol, allow_undefined_stereo=True), - forcefield='gaff-2.11' + forcefield="gaff-2.11", ) transitional_forcefield.registerTemplateGenerator(gaff.generator) diff --git a/openmmdl/openmmdl_simulation/scripts/post_md_conversions.py b/openmmdl/openmmdl_simulation/scripts/post_md_conversions.py index 49c7df0a..453b32d5 100644 --- a/openmmdl/openmmdl_simulation/scripts/post_md_conversions.py +++ b/openmmdl/openmmdl_simulation/scripts/post_md_conversions.py @@ -17,14 +17,14 @@ def mdtraj_conversion(pdb_file, mdtraj_output): mdtraj_frames = md.load_dcd("trajectory.dcd", top=pdb_file) mdtraj_frames.image_molecules() if "dcd" in mdtraj_output: - mdtraj_frames.save_dcd(f'centered_old_coordinates.dcd') + mdtraj_frames.save_dcd(f"centered_old_coordinates.dcd") if "xtc" in mdtraj_output: - mdtraj_frames.save_xtc(f'centered_old_coordinates.xtc') + mdtraj_frames.save_xtc(f"centered_old_coordinates.xtc") mdtraj_first_frame = mdtraj_frames[0:1] if "pdb" in mdtraj_output: - mdtraj_first_frame.save_pdb(f'centered_old_coordinates_top.pdb') + mdtraj_first_frame.save_pdb(f"centered_old_coordinates_top.pdb") if "gro" in mdtraj_output: - mdtraj_first_frame.save_gro(f'centered_old_coordinates_top.gro') + mdtraj_first_frame.save_gro(f"centered_old_coordinates_top.gro") def MDanalysis_conversion( @@ -55,79 +55,89 @@ def MDanalysis_conversion( topology_trajectory_all_atoms.center_of_mass() ) topology_trajectory_protein_ligand = topology_trajectory.select_atoms( - f'protein or resname {ligand_name} or resname {special_ligname}' + f"protein or resname {ligand_name} or resname {special_ligname}" ) if "pdb" in mda_output: if output_selection != "mda_prot_lig": - with mda.Writer(f'centered_traj_unaligned.dcd', topology_trajectory_all_atoms.n_atoms) as w: + with mda.Writer( + f"centered_traj_unaligned.dcd", topology_trajectory_all_atoms.n_atoms + ) as w: for ts in topology_trajectory.trajectory[1:]: w.write(topology_trajectory_all_atoms) - topology_trajectory_all_atoms.write(f'centered_top.pdb') - topology_ref_all_pdb = mda.Universe(f'centered_top.pdb') + topology_trajectory_all_atoms.write(f"centered_top.pdb") + topology_ref_all_pdb = mda.Universe(f"centered_top.pdb") mobile_all_pdb = mda.Universe( - f'centered_top.pdb', f'centered_traj_unaligned.dcd' + f"centered_top.pdb", f"centered_traj_unaligned.dcd" ) alignment_all_pdb = align.AlignTraj( mobile_all_pdb, topology_ref_all_pdb, select="protein and name CA", weights="mass", - filename=f'centered_traj.dcd', + filename=f"centered_traj.dcd", ) alignment_all_pdb.run() if output_selection != "mda_all": - with mda.Writer(f'prot_lig_traj_unaligned.dcd', topology_trajectory_protein_ligand.n_atoms) as w: + with mda.Writer( + f"prot_lig_traj_unaligned.dcd", + topology_trajectory_protein_ligand.n_atoms, + ) as w: for ts in topology_trajectory.trajectory[1:]: w.write(topology_trajectory_protein_ligand) - topology_trajectory_protein_ligand.write(f'prot_lig_top.pdb') - topology_ref_prot_lig_pdb = mda.Universe(f'prot_lig_top.pdb') + topology_trajectory_protein_ligand.write(f"prot_lig_top.pdb") + topology_ref_prot_lig_pdb = mda.Universe(f"prot_lig_top.pdb") mobile_prot_lig_pdb = mda.Universe( - f'prot_lig_top.pdb', f'prot_lig_traj_unaligned.dcd' + f"prot_lig_top.pdb", f"prot_lig_traj_unaligned.dcd" ) alignment_prot_lig_pdb = align.AlignTraj( mobile_prot_lig_pdb, topology_ref_prot_lig_pdb, select="protein and name CA", weights="mass", - filename=f'prot_lig_traj.dcd', + filename=f"prot_lig_traj.dcd", ) alignment_prot_lig_pdb.run() if "gro" in mda_output: if output_selection != "mda_prot_lig": - with mda.Writer(f'centered_traj_unaligned.xtc', topology_trajectory_all_atoms.n_atoms) as w: + with mda.Writer( + f"centered_traj_unaligned.xtc", topology_trajectory_all_atoms.n_atoms + ) as w: for ts in topology_trajectory.trajectory[1:]: w.write(topology_trajectory_all_atoms) - topology_trajectory_all_atoms.write(f'centered_top.gro') - topology_ref_all_gro = mda.Universe(f'centered_top.gro') + topology_trajectory_all_atoms.write(f"centered_top.gro") + topology_ref_all_gro = mda.Universe(f"centered_top.gro") mobile_all_gro = mda.Universe( - f'centered_top.gro', f'centered_traj_unaligned.xtc' + f"centered_top.gro", f"centered_traj_unaligned.xtc" ) alignment_all_gro = align.AlignTraj( mobile_all_gro, topology_ref_all_gro, select="protein and name CA", weights="mass", - filename=f'centered_traj.xtc', + filename=f"centered_traj.xtc", ) alignment_all_gro.run() if output_selection != "mda_all": - with mda.Writer(f'prot_lig_traj_unaligned.xtc', topology_trajectory_protein_ligand.n_atoms) as w: + with mda.Writer( + f"prot_lig_traj_unaligned.xtc", + topology_trajectory_protein_ligand.n_atoms, + ) as w: for ts in topology_trajectory.trajectory[1:]: w.write(topology_trajectory_protein_ligand) - topology_trajectory_protein_ligand.write(f'prot_lig_top.gro') - topology_ref_prot_lig_gro = mda.Universe(f'prot_lig_top.gro') + topology_trajectory_protein_ligand.write(f"prot_lig_top.gro") + topology_ref_prot_lig_gro = mda.Universe(f"prot_lig_top.gro") mobile_prot_lig_gro = mda.Universe( - f'prot_lig_top.gro', f'prot_lig_traj_unaligned.xtc' + f"prot_lig_top.gro", f"prot_lig_traj_unaligned.xtc" ) alignment_prot_lig_gro = align.AlignTraj( mobile_prot_lig_gro, topology_ref_prot_lig_gro, select="protein and name CA", weights="mass", - filename=f'prot_lig_traj.xtc', + filename=f"prot_lig_traj.xtc", ) alignment_prot_lig_gro.run() diff --git a/openmmdl/openmmdl_simulation/scripts/protein_ligand_prep.py b/openmmdl/openmmdl_simulation/scripts/protein_ligand_prep.py index 765ab1ab..aada7369 100644 --- a/openmmdl/openmmdl_simulation/scripts/protein_ligand_prep.py +++ b/openmmdl/openmmdl_simulation/scripts/protein_ligand_prep.py @@ -99,11 +99,15 @@ def merge_protein_and_ligand(protein, ligand): complex_positions (simtk.unit.quantity.Quantity): The merged positions. """ # combine topologies - md_protein_topology = md.Topology.from_openmm(protein.topology) # using mdtraj for protein top + md_protein_topology = md.Topology.from_openmm( + protein.topology + ) # using mdtraj for protein top md_ligand_topology = md.Topology.from_openmm( ligand.topology ) # using mdtraj for ligand top - md_complex_topology = md_protein_topology.join(md_ligand_topology) # add them together + md_complex_topology = md_protein_topology.join( + md_ligand_topology + ) # add them together complex_topology = md_complex_topology.to_openmm() @@ -112,11 +116,13 @@ def merge_protein_and_ligand(protein, ligand): # create an array for storing all atom positions as tupels containing a value and a unit # called OpenMM Quantities - complex_positions = unit.Quantity( - np.zeros([total_atoms, 3]), unit=unit.nanometers - ) - complex_positions[: len(protein.positions)] = protein.positions # add protein positions - complex_positions[len(protein.positions) :] = ligand.positions # add ligand positions + complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers) + complex_positions[: len(protein.positions)] = ( + protein.positions + ) # add protein positions + complex_positions[len(protein.positions) :] = ( + ligand.positions + ) # add ligand positions return complex_topology, complex_positions @@ -153,11 +159,7 @@ def water_padding_solvent_builder( PDBFile.writeFile(protein_pdb.topology, protein_pdb.positions, outfile) # Adds solvent to the selected protein - if ( - model_water == "charmm" - or model_water == "tip3pfb" - or model_water == "tip3" - ): + if model_water == "charmm" or model_water == "tip3pfb" or model_water == "tip3": modeller.addSolvent( forcefield, padding=water_padding_distance * unit.nanometers, @@ -228,23 +230,17 @@ def water_absolute_solvent_builder( PDBFile.writeFile(protein_pdb.topology, protein_pdb.positions, outfile) # Adds solvent to the selected protein - if ( - model_water == "charmm" - or model_water == "tip3pfb" - or model_water == "tip3" - ): + if model_water == "charmm" or model_water == "tip3pfb" or model_water == "tip3": modeller.addSolvent( forcefield, - boxSize=Vec3(water_box_x, water_box_y, water_box_z) - * unit.nanometers, + boxSize=Vec3(water_box_x, water_box_y, water_box_z) * unit.nanometers, positiveIon=water_positive_ion, negativeIon=water_negative_ion, ionicStrength=water_ionicstrength * unit.molar, ) elif model_water == "charmm_tip4pew": protein_pdb.addSolvent( - boxSize=Vec3(water_box_x, water_box_y, water_box_z) - * unit.nanometers, + boxSize=Vec3(water_box_x, water_box_y, water_box_z) * unit.nanometers, positiveIon=water_positive_ion, negativeIon=water_negative_ion, ionicStrength=water_ionicstrength * unit.molar, @@ -255,8 +251,7 @@ def water_absolute_solvent_builder( modeller.addSolvent( forcefield, model=model_water, - boxSize=Vec3(water_box_x, water_box_y, water_box_z) - * unit.nanometers, + boxSize=Vec3(water_box_x, water_box_y, water_box_z) * unit.nanometers, positiveIon=water_positive_ion, negativeIon=water_negative_ion, ionicStrength=water_ionicstrength * unit.molar, @@ -269,6 +264,7 @@ def water_absolute_solvent_builder( return modeller + def membrane_builder( ff, model_water, diff --git a/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/5wyz_solvent/5wyz_simulation.py b/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/5wyz_solvent/5wyz_simulation.py index f718867d..264e2a32 100644 --- a/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/5wyz_solvent/5wyz_simulation.py +++ b/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/5wyz_solvent/5wyz_simulation.py @@ -1,26 +1,59 @@ # This script was generated by OpenMM-MDL Setup on 2022-11-27. -# ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. -# .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| -# / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) -# ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) -# | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) -# : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' -# \ `"/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ -# '. \_/``".' / ) \ / | | | || | | || | | || .' | \ -# '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` - - - -from scripts.forcefield_water import ff_selection, water_forecfield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield -from scripts.protein_ligand_prep import protein_choice, prepare_ligand, rdkit_to_openmm, merge_protein_and_ligand, water_padding_solvent_builder, water_absolute_solvent_builder, membrane_builder, water_conversion -from scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion, rmsd_for_atomgroups, RMSD_dist_frames, atomic_distance -from scripts.cleaning_procedures import cleanup, post_md_file_movement +# ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. +# .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| +# / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) +# ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) +# | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) +# : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' +# \ `"/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ +# '. \_/``".' / ) \ / | | | || | | || | | || .' | \ +# '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` + + +from scripts.forcefield_water import ( + ff_selection, + water_forecfield_selection, + water_model_selection, + generate_forcefield, + generate_transitional_forcefield, +) +from scripts.protein_ligand_prep import ( + protein_choice, + prepare_ligand, + rdkit_to_openmm, + merge_protein_and_ligand, + water_padding_solvent_builder, + water_absolute_solvent_builder, + membrane_builder, + water_conversion, +) +from scripts.post_md_conversions import ( + mdtraj_conversion, + MDanalysis_conversion, + rmsd_for_atomgroups, + RMSD_dist_frames, + atomic_distance, +) +from scripts.cleaning_procedures import cleanup, post_md_file_movement import simtk.openmm.app as app -from simtk.openmm.app import PDBFile, Modeller, PDBReporter, StateDataReporter, DCDReporter, CheckpointReporter -from simtk.openmm import unit, Platform, Platform_getPlatformByName, MonteCarloBarostat, LangevinMiddleIntegrator +from simtk.openmm.app import ( + PDBFile, + Modeller, + PDBReporter, + StateDataReporter, + DCDReporter, + CheckpointReporter, +) +from simtk.openmm import ( + unit, + Platform, + Platform_getPlatformByName, + MonteCarloBarostat, + LangevinMiddleIntegrator, +) from simtk.openmm import Vec3 import simtk.openmm as mm import sys @@ -29,13 +62,13 @@ # Input Files ############# Ligand and Protein Data ################### -######## Add the Ligand SDf File and Protein PDB File in the Folder with the Script ######### +######## Add the Ligand SDf File and Protein PDB File in the Folder with the Script ######### ligand_select = "yes" ligand_name = "UNK" ligand_sdf = "5VF.sdf" -minimize = False +minimize = False protein = "5wyz-moe-processed_openMMDL.pdb" ############# Ligand and Protein Preparation ################### @@ -44,18 +77,18 @@ ############# Forcefield, Water and Membrane Model Selection ################### -ff = 'AMBER14' -water = 'TIP3P' +ff = "AMBER14" +water = "TIP3P" ############# Water Box Settings ################### add_membrane = False Water_Box = "Buffer" water_padding_distance = 1.0 -water_boxShape = 'cube' +water_boxShape = "cube" water_ionicstrength = 0.15 -water_positive_ion = 'Na+' -water_negative_ion = 'Cl-' +water_positive_ion = "Na+" +water_negative_ion = "Cl-" ############# Post MD Processing ################### @@ -65,85 +98,149 @@ # System Configuration nonbondedMethod = app.PME -nonbondedCutoff = 1.0*unit.nanometers +nonbondedCutoff = 1.0 * unit.nanometers ewaldErrorTolerance = 0.0005 constraints = app.HBonds rigidWater = True constraintTolerance = 0.000001 -hydrogenMass = 1.5*unit.amu +hydrogenMass = 1.5 * unit.amu # Integration Options -dt = 0.002*unit.picoseconds -temperature = 300*unit.kelvin -friction = 1.0/unit.picosecond -pressure = 1.0*unit.atmospheres +dt = 0.002 * unit.picoseconds +temperature = 300 * unit.kelvin +friction = 1.0 / unit.picosecond +pressure = 1.0 * unit.atmospheres barostatInterval = 25 # Simulation Options steps = 5000000 equilibrationSteps = 1000 -platform = Platform.getPlatformByName('CUDA') -platformProperties = {'Precision': 'single'} -dcdReporter = DCDReporter('trajectory.dcd', 50000) -dataReporter = StateDataReporter('log.txt', 1000, totalSteps=steps, - step=True, speed=True, progress=True, potentialEnergy=True, temperature=True, separator='\t') -checkpointReporter = CheckpointReporter('checkpoint.chk', 10000) -checkpointReporter10 = CheckpointReporter('10x_checkpoint.chk', 100000) -checkpointReporter100 = CheckpointReporter('100x_checkpoint.chk', 1000000) - -if ligand_select == 'yes': - +platform = Platform.getPlatformByName("CUDA") +platformProperties = {"Precision": "single"} +dcdReporter = DCDReporter("trajectory.dcd", 50000) +dataReporter = StateDataReporter( + "log.txt", + 1000, + totalSteps=steps, + step=True, + speed=True, + progress=True, + potentialEnergy=True, + temperature=True, + separator="\t", +) +checkpointReporter = CheckpointReporter("checkpoint.chk", 10000) +checkpointReporter10 = CheckpointReporter("10x_checkpoint.chk", 100000) +checkpointReporter100 = CheckpointReporter("100x_checkpoint.chk", 1000000) + +if ligand_select == "yes": + print("Preparing MD Simulation with ligand") - - ligand_prepared = prepare_ligand(ligand_sdf,minimize_molecule=minimize) - - omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) -protein_pdb = protein_choice(protein_is_prepared=protein_prepared,protein=protein) + ligand_prepared = prepare_ligand(ligand_sdf, minimize_molecule=minimize) + + omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) + +protein_pdb = protein_choice(protein_is_prepared=protein_prepared, protein=protein) forcefield_selected = ff_selection(ff) -water_selected = water_forecfield_selection(water=water,forcefield_selection=ff_selection(ff)) -model_water = water_model_selection(water=water,forcefield_selection=ff_selection(ff)) -print("Forcefield and Water Model Selected") +water_selected = water_forecfield_selection( + water=water, forcefield_selection=ff_selection(ff) +) +model_water = water_model_selection(water=water, forcefield_selection=ff_selection(ff)) +print("Forcefield and Water Model Selected") + +if ligand_select == "yes": -if ligand_select == 'yes': - if add_membrane == True: - transitional_forcefield = generate_transitional_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) - - forcefield = generate_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) - - complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) - - print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") + transitional_forcefield = generate_transitional_forcefield( + protein_ff=forcefield_selected, + solvent_ff=water_selected, + add_membrane=add_membrane, + rdkit_mol=ligand_prepared, + ) + + forcefield = generate_forcefield( + protein_ff=forcefield_selected, + solvent_ff=water_selected, + add_membrane=add_membrane, + rdkit_mol=ligand_prepared, + ) + + complex_topology, complex_positions = merge_protein_and_ligand( + protein_pdb, omm_ligand + ) + + print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") modeller = app.Modeller(complex_topology, complex_positions) - + if add_membrane == True: - membrane_builder(ff, model_water, forcefield, transitional_forcefield, protein_pdb, modeller, membrane_lipid_type, membrane_padding, membrane_positive_ion, membrane_negative_ion, membrane_ionicstrength, protein) + membrane_builder( + ff, + model_water, + forcefield, + transitional_forcefield, + protein_pdb, + modeller, + membrane_lipid_type, + membrane_padding, + membrane_positive_ion, + membrane_negative_ion, + membrane_ionicstrength, + protein, + ) elif add_membrane == False: if Water_Box == "Buffer": - water_padding_solvent_builder(model_water, forcefield, water_padding_distance, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) + water_padding_solvent_builder( + model_water, + forcefield, + water_padding_distance, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) elif Water_Box == "Absolute": - water_absolute_solvent_builder(model_water, forcefield, water_box_x, water_box_y, water_box_z, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) - + water_absolute_solvent_builder( + model_water, + forcefield, + water_box_x, + water_box_y, + water_box_z, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) + if add_membrane == True: - if model_water == 'tip4pew' or model_water == 'tip5p': + if model_water == "tip4pew" or model_water == "tip5p": water_conversion(model_water, modeller, protein) topology = modeller.topology positions = modeller.positions - # Prepare the Simulation -print('Building system...') -system = forcefield.createSystem(topology, nonbondedMethod=nonbondedMethod, nonbondedCutoff=nonbondedCutoff, - constraints=constraints, rigidWater=rigidWater, ewaldErrorTolerance=ewaldErrorTolerance, hydrogenMass=hydrogenMass) +print("Building system...") +system = forcefield.createSystem( + topology, + nonbondedMethod=nonbondedMethod, + nonbondedCutoff=nonbondedCutoff, + constraints=constraints, + rigidWater=rigidWater, + ewaldErrorTolerance=ewaldErrorTolerance, + hydrogenMass=hydrogenMass, +) system.addForce(MonteCarloBarostat(pressure, temperature, barostatInterval)) integrator = LangevinMiddleIntegrator(temperature, friction, dt) integrator.setConstraintTolerance(constraintTolerance) @@ -152,34 +249,45 @@ # Minimize and Equilibrate -print('Performing energy minimization...') +print("Performing energy minimization...") simulation.minimizeEnergy() -with open(f'Energyminimization_{protein}', 'w') as outfile: +with open(f"Energyminimization_{protein}", "w") as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - -print('Equilibrating...') + +print("Equilibrating...") simulation.context.setVelocitiesToTemperature(temperature) simulation.step(equilibrationSteps) -with open(f'Equilibration_{protein}', 'w') as outfile: +with open(f"Equilibration_{protein}", "w") as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - + # Simulate -print('Simulating...') -simulation.reporters.append(PDBReporter(f'output_{protein}', 5000000)) +print("Simulating...") +simulation.reporters.append(PDBReporter(f"output_{protein}", 5000000)) simulation.reporters.append(dcdReporter) simulation.reporters.append(dataReporter) simulation.reporters.append(checkpointReporter) simulation.reporters.append(checkpointReporter10) simulation.reporters.append(checkpointReporter100) -simulation.reporters.append(StateDataReporter(sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True)) +simulation.reporters.append( + StateDataReporter( + sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True + ) +) simulation.currentStep = 0 simulation.step(steps) -mdtraj_conversion(f'Equilibration_{protein}') -MDanalysis_conversion(f'centered_old_coordinates.pdb', f'centered_old_coordinates.dcd', ligand_name='UNK') -rmsd_for_atomgroups(f'prot_lig_top.pdb', f'prot_lig_traj.dcd', selection1='backbone', selection2=['protein', 'resname UNK']) -RMSD_dist_frames(f'prot_lig_top.pdb', f'prot_lig_traj.dcd', lig='UNK') -post_md_file_movement(protein,ligand_sdf) \ No newline at end of file +mdtraj_conversion(f"Equilibration_{protein}") +MDanalysis_conversion( + f"centered_old_coordinates.pdb", f"centered_old_coordinates.dcd", ligand_name="UNK" +) +rmsd_for_atomgroups( + f"prot_lig_top.pdb", + f"prot_lig_traj.dcd", + selection1="backbone", + selection2=["protein", "resname UNK"], +) +RMSD_dist_frames(f"prot_lig_top.pdb", f"prot_lig_traj.dcd", lig="UNK") +post_md_file_movement(protein, ligand_sdf) diff --git a/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/6b73_membrane/6b73_simulation.py b/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/6b73_membrane/6b73_simulation.py index 8e6f830d..36c7dd2b 100644 --- a/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/6b73_membrane/6b73_simulation.py +++ b/openmmdl/openmmdl_simulation/tutorial_systems/pdb_path/6b73_membrane/6b73_simulation.py @@ -1,26 +1,59 @@ # This script was generated by OpenMM-MDL Setup on 2022-11-27. -# ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. -# .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| -# / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) -# ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) -# | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) -# : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' -# \ `"/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ -# '. \_/``".' / ) \ / | | | || | | || | | || .' | \ -# '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` - - - -from scripts.forcefield_water import ff_selection, water_forecfield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield -from scripts.protein_ligand_prep import protein_choice, prepare_ligand, rdkit_to_openmm, merge_protein_and_ligand, water_padding_solvent_builder, water_absolute_solvent_builder, membrane_builder, water_conversion -from scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion, rmsd_for_atomgroups, RMSD_dist_frames, atomic_distance -from scripts.cleaning_procedures import cleanup, post_md_file_movement +# ,-----. .-------. .-''-. ,---. .--.,---. ,---.,---. ,---. ______ .---. +# .' .-, '. \ _(`)_ \ .'_ _ \ | \ | || \ / || \ / || _ `''. | ,_| +# / ,-.| \ _ \ | (_ o._)| / ( ` ) '| , \ | || , \/ , || , \/ , || _ | ) _ \,-./ ) +# ; \ '_ / | :| (_,_) /. (_ o _) || |\_ \| || |\_ /| || |\_ /| ||( ''_' ) |\ '_ '`) +# | _`,/ \ _/ || '-.-' | (_,_)___|| _( )_\ || _( )_/ | || _( )_/ | || . (_) `. | > (_) ) +# : ( '\_/ \ ;| | ' \ .---.| (_ o _) || (_ o _) | || (_ o _) | ||(_ ._) '( . .-' +# \ `"/ \ ) / | | \ `-' /| (_,_)\ || (_,_) | || (_,_) | || (_.\.' / `-'`-'|___ +# '. \_/``".' / ) \ / | | | || | | || | | || .' | \ +# '-----' `---' `'-..-' '--' '--''--' '--''--' '--''-----'` `--------` + + +from scripts.forcefield_water import ( + ff_selection, + water_forecfield_selection, + water_model_selection, + generate_forcefield, + generate_transitional_forcefield, +) +from scripts.protein_ligand_prep import ( + protein_choice, + prepare_ligand, + rdkit_to_openmm, + merge_protein_and_ligand, + water_padding_solvent_builder, + water_absolute_solvent_builder, + membrane_builder, + water_conversion, +) +from scripts.post_md_conversions import ( + mdtraj_conversion, + MDanalysis_conversion, + rmsd_for_atomgroups, + RMSD_dist_frames, + atomic_distance, +) +from scripts.cleaning_procedures import cleanup, post_md_file_movement import simtk.openmm.app as app -from simtk.openmm.app import PDBFile, Modeller, PDBReporter, StateDataReporter, DCDReporter, CheckpointReporter -from simtk.openmm import unit, Platform, Platform_getPlatformByName, MonteCarloBarostat, LangevinMiddleIntegrator +from simtk.openmm.app import ( + PDBFile, + Modeller, + PDBReporter, + StateDataReporter, + DCDReporter, + CheckpointReporter, +) +from simtk.openmm import ( + unit, + Platform, + Platform_getPlatformByName, + MonteCarloBarostat, + LangevinMiddleIntegrator, +) from simtk.openmm import Vec3 import simtk.openmm as mm import sys @@ -29,13 +62,13 @@ # Input Files ############# Ligand and Protein Data ################### -######## Add the Ligand SDf File and Protein PDB File in the Folder with the Script ######### +######## Add the Ligand SDf File and Protein PDB File in the Folder with the Script ######### ligand_select = "yes" ligand_name = "UNK" ligand_sdf = "6b73_lig.sdf" -minimize = False +minimize = False protein = "6b73-moe-processed_openMMDL.pdb" ############# Ligand and Protein Preparation ################### @@ -44,17 +77,17 @@ ############# Forcefield, Water and Membrane Model Selection ################### -ff = 'AMBER14' -water = 'TIP3P-FB' +ff = "AMBER14" +water = "TIP3P-FB" ############# Membrane Settings ################### add_membrane = True -membrane_lipid_type = 'POPC' +membrane_lipid_type = "POPC" membrane_padding = 1.0 membrane_ionicstrength = 0.15 -membrane_positive_ion = 'Na+' -membrane_negative_ion = 'Cl-' +membrane_positive_ion = "Na+" +membrane_negative_ion = "Cl-" ############# Post MD Processing ################### @@ -64,85 +97,149 @@ # System Configuration nonbondedMethod = app.PME -nonbondedCutoff = 1.0*unit.nanometers +nonbondedCutoff = 1.0 * unit.nanometers ewaldErrorTolerance = 0.0005 constraints = app.HBonds rigidWater = True constraintTolerance = 0.000001 -hydrogenMass = 1.5*unit.amu +hydrogenMass = 1.5 * unit.amu # Integration Options -dt = 0.004*unit.picoseconds -temperature = 300*unit.kelvin -friction = 1.0/unit.picosecond -pressure = 1.0*unit.atmospheres +dt = 0.004 * unit.picoseconds +temperature = 300 * unit.kelvin +friction = 1.0 / unit.picosecond +pressure = 1.0 * unit.atmospheres barostatInterval = 25 # Simulation Options steps = 1250000 equilibrationSteps = 1000 -platform = Platform.getPlatformByName('CUDA') -platformProperties = {'Precision': 'single'} -dcdReporter = DCDReporter('trajectory.dcd', 12500) -dataReporter = StateDataReporter('log.txt', 1000, totalSteps=steps, - step=True, speed=True, progress=True, potentialEnergy=True, temperature=True, separator='\t') -checkpointReporter = CheckpointReporter('checkpoint.chk', 10000) -checkpointReporter10 = CheckpointReporter('10x_checkpoint.chk', 100000) -checkpointReporter100 = CheckpointReporter('100x_checkpoint.chk', 1000000) - -if ligand_select == 'yes': - +platform = Platform.getPlatformByName("CUDA") +platformProperties = {"Precision": "single"} +dcdReporter = DCDReporter("trajectory.dcd", 12500) +dataReporter = StateDataReporter( + "log.txt", + 1000, + totalSteps=steps, + step=True, + speed=True, + progress=True, + potentialEnergy=True, + temperature=True, + separator="\t", +) +checkpointReporter = CheckpointReporter("checkpoint.chk", 10000) +checkpointReporter10 = CheckpointReporter("10x_checkpoint.chk", 100000) +checkpointReporter100 = CheckpointReporter("100x_checkpoint.chk", 1000000) + +if ligand_select == "yes": + print("Preparing MD Simulation with ligand") - - ligand_prepared = prepare_ligand(ligand_sdf,minimize_molecule=minimize) - - omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) -protein_pdb = protein_choice(protein_is_prepared=protein_prepared,protein=protein) + ligand_prepared = prepare_ligand(ligand_sdf, minimize_molecule=minimize) + + omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) + +protein_pdb = protein_choice(protein_is_prepared=protein_prepared, protein=protein) forcefield_selected = ff_selection(ff) -water_selected = water_forecfield_selection(water=water,forcefield_selection=ff_selection(ff)) -model_water = water_model_selection(water=water,forcefield_selection=ff_selection(ff)) -print("Forcefield and Water Model Selected") +water_selected = water_forecfield_selection( + water=water, forcefield_selection=ff_selection(ff) +) +model_water = water_model_selection(water=water, forcefield_selection=ff_selection(ff)) +print("Forcefield and Water Model Selected") + +if ligand_select == "yes": -if ligand_select == 'yes': - if add_membrane == True: - transitional_forcefield = generate_transitional_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) - - forcefield = generate_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) - - complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) - - print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") + transitional_forcefield = generate_transitional_forcefield( + protein_ff=forcefield_selected, + solvent_ff=water_selected, + add_membrane=add_membrane, + rdkit_mol=ligand_prepared, + ) + + forcefield = generate_forcefield( + protein_ff=forcefield_selected, + solvent_ff=water_selected, + add_membrane=add_membrane, + rdkit_mol=ligand_prepared, + ) + + complex_topology, complex_positions = merge_protein_and_ligand( + protein_pdb, omm_ligand + ) + + print("Complex topology has", complex_topology.getNumAtoms(), "atoms.") modeller = app.Modeller(complex_topology, complex_positions) - + if add_membrane == True: - membrane_builder(ff, model_water, forcefield, transitional_forcefield, protein_pdb, modeller, membrane_lipid_type, membrane_padding, membrane_positive_ion, membrane_negative_ion, membrane_ionicstrength, protein) + membrane_builder( + ff, + model_water, + forcefield, + transitional_forcefield, + protein_pdb, + modeller, + membrane_lipid_type, + membrane_padding, + membrane_positive_ion, + membrane_negative_ion, + membrane_ionicstrength, + protein, + ) elif add_membrane == False: if Water_Box == "Buffer": - water_padding_solvent_builder(model_water, forcefield, water_padding_distance, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) + water_padding_solvent_builder( + model_water, + forcefield, + water_padding_distance, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) elif Water_Box == "Absolute": - water_absolute_solvent_builder(model_water, forcefield, water_box_x, water_box_y, water_box_z, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) - + water_absolute_solvent_builder( + model_water, + forcefield, + water_box_x, + water_box_y, + water_box_z, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) + if add_membrane == True: - if model_water == 'tip4pew' or model_water == 'tip5p': + if model_water == "tip4pew" or model_water == "tip5p": water_conversion(model_water, modeller, protein) topology = modeller.topology positions = modeller.positions - # Prepare the Simulation -print('Building system...') -system = forcefield.createSystem(topology, nonbondedMethod=nonbondedMethod, nonbondedCutoff=nonbondedCutoff, - constraints=constraints, rigidWater=rigidWater, ewaldErrorTolerance=ewaldErrorTolerance, hydrogenMass=hydrogenMass) +print("Building system...") +system = forcefield.createSystem( + topology, + nonbondedMethod=nonbondedMethod, + nonbondedCutoff=nonbondedCutoff, + constraints=constraints, + rigidWater=rigidWater, + ewaldErrorTolerance=ewaldErrorTolerance, + hydrogenMass=hydrogenMass, +) system.addForce(MonteCarloBarostat(pressure, temperature, barostatInterval)) integrator = LangevinMiddleIntegrator(temperature, friction, dt) integrator.setConstraintTolerance(constraintTolerance) @@ -151,34 +248,45 @@ # Minimize and Equilibrate -print('Performing energy minimization...') +print("Performing energy minimization...") simulation.minimizeEnergy() -with open(f'Energyminimization_{protein}', 'w') as outfile: +with open(f"Energyminimization_{protein}", "w") as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - -print('Equilibrating...') + +print("Equilibrating...") simulation.context.setVelocitiesToTemperature(temperature) simulation.step(equilibrationSteps) -with open(f'Equilibration_{protein}', 'w') as outfile: +with open(f"Equilibration_{protein}", "w") as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) - + # Simulate -print('Simulating...') -simulation.reporters.append(PDBReporter(f'output_{protein}', 1250000)) +print("Simulating...") +simulation.reporters.append(PDBReporter(f"output_{protein}", 1250000)) simulation.reporters.append(dcdReporter) simulation.reporters.append(dataReporter) simulation.reporters.append(checkpointReporter) simulation.reporters.append(checkpointReporter10) simulation.reporters.append(checkpointReporter100) -simulation.reporters.append(StateDataReporter(sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True)) +simulation.reporters.append( + StateDataReporter( + sys.stdout, 1000, step=True, potentialEnergy=True, temperature=True + ) +) simulation.currentStep = 0 simulation.step(steps) -mdtraj_conversion(f'Equilibration_{protein}') -MDanalysis_conversion(f'centered_old_coordinates.pdb', f'centered_old_coordinates.dcd', ligand_name='UNK') -rmsd_for_atomgroups(f'prot_lig_top.pdb', f'prot_lig_traj.dcd', selection1='backbone', selection2=['protein', 'resname UNK']) -RMSD_dist_frames(f'prot_lig_top.pdb', f'prot_lig_traj.dcd', lig='UNK') -post_md_file_movement(protein,ligand_sdf) +mdtraj_conversion(f"Equilibration_{protein}") +MDanalysis_conversion( + f"centered_old_coordinates.pdb", f"centered_old_coordinates.dcd", ligand_name="UNK" +) +rmsd_for_atomgroups( + f"prot_lig_top.pdb", + f"prot_lig_traj.dcd", + selection1="backbone", + selection2=["protein", "resname UNK"], +) +RMSD_dist_frames(f"prot_lig_top.pdb", f"prot_lig_traj.dcd", lig="UNK") +post_md_file_movement(protein, ligand_sdf) diff --git a/openmmdl/tests/analysis/markov_state_figure_generation_test.py b/openmmdl/tests/analysis/markov_state_figure_generation_test.py index 6d8da660..d44a1157 100644 --- a/openmmdl/tests/analysis/markov_state_figure_generation_test.py +++ b/openmmdl/tests/analysis/markov_state_figure_generation_test.py @@ -1,6 +1,10 @@ import networkx as nx import os -from openmmdl.openmmdl_analysis.markov_state_figure_generation import min_transition_calculation, binding_site_markov_network +from openmmdl.openmmdl_analysis.markov_state_figure_generation import ( + min_transition_calculation, + binding_site_markov_network, +) + # Create a test for min_transition_calculation def test_min_transition_calculation(): @@ -9,14 +13,13 @@ def test_min_transition_calculation(): result = min_transition_calculation(min_transition) assert result == expected_output + # Create a test for binding_site_markov_network def test_binding_site_markov_network(): # Define test data total_frames = 1000 min_transitions = [5, 10] - combined_dict = { - 'all': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'A', 'A', 'B'] - } + combined_dict = {"all": ["A", "B", "A", "C", "B", "A", "C", "A", "A", "B"]} # Run the function binding_site_markov_network(total_frames, min_transitions, combined_dict) @@ -27,4 +30,5 @@ def test_binding_site_markov_network(): plot_path = os.path.join("Binding_Modes_Markov_States", plot_filename) assert os.path.exists(plot_path) + # Optionally, you can include more test cases to cover different scenarios and edge cases. diff --git a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py b/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py index b4823082..a1a6b85f 100644 --- a/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py +++ b/openmmdl/tests/openmmdl_analysis/binding_mode_processing_test.py @@ -14,287 +14,298 @@ @pytest.fixture def sample_dataframe_bindingmode_processing(): data = { - 'FRAME': {0: 1, 1: 2, 2: 3, 3: 2}, - 'Prot_partner': {0: 'A', 1: 'B', 2: 'C', 3: 'A'}, - 'INTERACTION': {0: 'hydrophobic', 1: 'hbond', 2: 'saltbridge', 3: 'hydrophobic'}, - 'LIGCARBONIDX': {0: 101, 1: 102, 2: 103, 3: 102}, - 'DONORIDX': {0: 201, 1: 202, 2: 203, 3: 202}, - 'ACCEPTORIDX': {0: 301, 1: 302, 2: 303, 3: 302}, - 'PROTISDON': {0: True, 1: False, 2: True, 3: False}, - 'LIG_IDX_LIST': {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - 'LIG_GROUP': {0: 'Group1', 1: 'Group2', 2: 'Group3', 3: 'Group1'}, - 'PROTISPOS': {0: True, 1: False, 2: True, 3: True}, - 'DON_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'DONORTYPE': {0: 0, 1: 0, 2: 0, 3: 0}, - 'ACCEPTOR_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'DONOR_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'LOCATION': {0: 0, 1: 0, 2: 0, 3: 0}, - 'METAL_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'METAL_TYPE': {0: 0, 1: 0, 2: 0, 3: 0}, - 'RESTYPE_LIG': {0: 0, 1: 0, 2: 0, 3: 0}, - 'TARGET_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'COORDINATION': {0: 0, 1: 0, 2: 0, 3: 0} + "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, + "Prot_partner": {0: "A", 1: "B", 2: "C", 3: "A"}, + "INTERACTION": { + 0: "hydrophobic", + 1: "hbond", + 2: "saltbridge", + 3: "hydrophobic", + }, + "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, + "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, + "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, + "PROTISDON": {0: True, 1: False, 2: True, 3: False}, + "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, + "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, + "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, + "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, + "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, + "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, + "RESTYPE_LIG": {0: 0, 1: 0, 2: 0, 3: 0}, + "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, } # Add 'halogen' and 'hbond' data to the existing DataFrame - data['FRAME'][4] = 4 # Add a new 'FRAME' value - data['Prot_partner'][4] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][4] = 'halogen' # Add 'halogen' interaction - data['DON_IDX'][4] = 501 # DON_IDX for 'halogen' - data['DONORTYPE'][4] = 'F' # Halogen type - data['ACCEPTOR_IDX'][4] = 0 - data['DONOR_IDX'][4] = 0 - data['LIG_IDX_LIST'][4] = 0 - data['LIG_GROUP'][4] = 0 # LIG_GROUP for 'pication - data['RESTYPE_LIG'][4] = 0 - data['TARGET_IDX'][4] = 0 - - data['FRAME'][5] = 5 # Add a new 'FRAME' value - data['Prot_partner'][5] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][5] = 'hbond' # Add 'hbond' interaction - data['ACCEPTORIDX'][5] = 301 # ACCEPTORIDX for 'hbond' - data['DON_IDX'][5] = 0 # DON_IDX - data['DONORTYPE'][5] = 0 # DON_IDX - data['PROTISDON'][5] = True # PROTISDON is True for 'hbond' - data['ACCEPTOR_IDX'][5] = 0 - data['LIG_IDX_LIST'][5] = 0 - data['DONOR_IDX'][5] = 0 - data['LIG_GROUP'][5] = 0 # LIG_GROUP for 'pication - data['RESTYPE_LIG'][5] = 0 - data['TARGET_IDX'][5] = 0 + data["FRAME"][4] = 4 # Add a new 'FRAME' value + data["Prot_partner"][4] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][4] = "halogen" # Add 'halogen' interaction + data["DON_IDX"][4] = 501 # DON_IDX for 'halogen' + data["DONORTYPE"][4] = "F" # Halogen type + data["ACCEPTOR_IDX"][4] = 0 + data["DONOR_IDX"][4] = 0 + data["LIG_IDX_LIST"][4] = 0 + data["LIG_GROUP"][4] = 0 # LIG_GROUP for 'pication + data["RESTYPE_LIG"][4] = 0 + data["TARGET_IDX"][4] = 0 + + data["FRAME"][5] = 5 # Add a new 'FRAME' value + data["Prot_partner"][5] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][5] = "hbond" # Add 'hbond' interaction + data["ACCEPTORIDX"][5] = 301 # ACCEPTORIDX for 'hbond' + data["DON_IDX"][5] = 0 # DON_IDX + data["DONORTYPE"][5] = 0 # DON_IDX + data["PROTISDON"][5] = True # PROTISDON is True for 'hbond' + data["ACCEPTOR_IDX"][5] = 0 + data["LIG_IDX_LIST"][5] = 0 + data["DONOR_IDX"][5] = 0 + data["LIG_GROUP"][5] = 0 # LIG_GROUP for 'pication + data["RESTYPE_LIG"][5] = 0 + data["TARGET_IDX"][5] = 0 # Add 'waterbridge' cases where PROTISDON is both True and False - data['FRAME'][6] = 6 # Add a new 'FRAME' value - data['Prot_partner'][6] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][6] = 'waterbridge' # Add 'waterbridge' interaction - data['ACCEPTOR_IDX'][6] = 401 # ACCEPTOR_IDX for 'waterbridge' - data['DON_IDX'][6] = 0 # DON_IDX - data['DONORTYPE'][6] = 0 # DON_IDX - data['DONOR_IDX'][6] = 0 - data['LIG_IDX_LIST'][6] = 0 - data['PROTISDON'][6] = True # PROTISDON is True for 'waterbridge' - data['LIG_GROUP'][6] = 0 # LIG_GROUP for 'pication - data['RESTYPE_LIG'][6] = 0 - data['TARGET_IDX'][6] = 0 - - data['FRAME'][7] = 7 # Add a new 'FRAME' value - data['Prot_partner'][7] = 'B' # Add a new 'Prot_partner' value - data['INTERACTION'][7] = 'waterbridge' # Add 'waterbridge' interaction - data['DONOR_IDX'][7] = 501 # DONOR_IDX for 'waterbridge' - data['DON_IDX'][7] = 0 # DON_IDX - data['DONORTYPE'][7] = 0 # DON_IDX - data['PROTISDON'][7] = False # PROTISDON is False for 'waterbridge' - data['ACCEPTOR_IDX'][7] = 0 - data['LIG_IDX_LIST'][7] = 0 # LIG_IDX_LIST for 'pication' - data['LIG_GROUP'][7] = 0 # LIG_GROUP for 'pication - data['RESTYPE_LIG'][7] = 0 - data['TARGET_IDX'][7] = 0 + data["FRAME"][6] = 6 # Add a new 'FRAME' value + data["Prot_partner"][6] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][6] = "waterbridge" # Add 'waterbridge' interaction + data["ACCEPTOR_IDX"][6] = 401 # ACCEPTOR_IDX for 'waterbridge' + data["DON_IDX"][6] = 0 # DON_IDX + data["DONORTYPE"][6] = 0 # DON_IDX + data["DONOR_IDX"][6] = 0 + data["LIG_IDX_LIST"][6] = 0 + data["PROTISDON"][6] = True # PROTISDON is True for 'waterbridge' + data["LIG_GROUP"][6] = 0 # LIG_GROUP for 'pication + data["RESTYPE_LIG"][6] = 0 + data["TARGET_IDX"][6] = 0 + + data["FRAME"][7] = 7 # Add a new 'FRAME' value + data["Prot_partner"][7] = "B" # Add a new 'Prot_partner' value + data["INTERACTION"][7] = "waterbridge" # Add 'waterbridge' interaction + data["DONOR_IDX"][7] = 501 # DONOR_IDX for 'waterbridge' + data["DON_IDX"][7] = 0 # DON_IDX + data["DONORTYPE"][7] = 0 # DON_IDX + data["PROTISDON"][7] = False # PROTISDON is False for 'waterbridge' + data["ACCEPTOR_IDX"][7] = 0 + data["LIG_IDX_LIST"][7] = 0 # LIG_IDX_LIST for 'pication' + data["LIG_GROUP"][7] = 0 # LIG_GROUP for 'pication + data["RESTYPE_LIG"][7] = 0 + data["TARGET_IDX"][7] = 0 # Add 'pistacking' case - data['FRAME'][8] = 8 # Add a new 'FRAME' value - data['Prot_partner'][8] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][8] = 'pistacking' # Add 'pistacking' interaction - data['LIG_IDX_LIST'][8] = [7, 8] # LIG_IDX_LIST for 'pistacking' - data['LIG_GROUP'][8] = 0 # LIG_GROUP for 'pication - data['ACCEPTOR_IDX'][8] = 0 - data['DON_IDX'][8] = 0 # DON_IDX - data['DONOR_IDX'][8] = 0 - data['PROTISDON'][8] = False - data['DONORTYPE'][8] = 0 # DON_IDX - data['RESTYPE_LIG'][8] = 0 - data['TARGET_IDX'][8] = 0 + data["FRAME"][8] = 8 # Add a new 'FRAME' value + data["Prot_partner"][8] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][8] = "pistacking" # Add 'pistacking' interaction + data["LIG_IDX_LIST"][8] = [7, 8] # LIG_IDX_LIST for 'pistacking' + data["LIG_GROUP"][8] = 0 # LIG_GROUP for 'pication + data["ACCEPTOR_IDX"][8] = 0 + data["DON_IDX"][8] = 0 # DON_IDX + data["DONOR_IDX"][8] = 0 + data["PROTISDON"][8] = False + data["DONORTYPE"][8] = 0 # DON_IDX + data["RESTYPE_LIG"][8] = 0 + data["TARGET_IDX"][8] = 0 # Add 'pication' case - data['FRAME'][9] = 9 # Add a new 'FRAME' value - data['Prot_partner'][9] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][9] = 'pication' # Add 'pication' interaction - data['LIG_IDX_LIST'][9] = [9, 10] # LIG_IDX_LIST for 'pication' - data['LIG_GROUP'][9] = 'Group4' # LIG_GROUP for 'pication' - data['ACCEPTOR_IDX'][9] = 0 - data['DON_IDX'][9] = 0 # DON_IDX - data['PROTISDON'][9] = False - data['DONOR_IDX'][9] = 0 - data['DONORTYPE'][9] = 0 # DON_IDX - data['RESTYPE_LIG'][9] = 0 - data['TARGET_IDX'][9] = 0 - + data["FRAME"][9] = 9 # Add a new 'FRAME' value + data["Prot_partner"][9] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][9] = "pication" # Add 'pication' interaction + data["LIG_IDX_LIST"][9] = [9, 10] # LIG_IDX_LIST for 'pication' + data["LIG_GROUP"][9] = "Group4" # LIG_GROUP for 'pication' + data["ACCEPTOR_IDX"][9] = 0 + data["DON_IDX"][9] = 0 # DON_IDX + data["PROTISDON"][9] = False + data["DONOR_IDX"][9] = 0 + data["DONORTYPE"][9] = 0 # DON_IDX + data["RESTYPE_LIG"][9] = 0 + data["TARGET_IDX"][9] = 0 + # Add 'metal' interaction case - data['FRAME'][10] = 10 # Add a new 'FRAME' value - data['Prot_partner'][10] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][10] = 'metal' # Add 'metal' interaction - data['METAL_IDX'][10] = 401 # METAL_IDX for 'metal' - data['METAL_TYPE'][10] = 'Fe' # Metal type - data['LOCATION'][10] = 'site1' # Location - data['ACCEPTOR_IDX'][10] = 0 - data['DONOR_IDX'][10] = 0 - data['RESTYPE_LIG'][10] = 'A' - data['TARGET_IDX'][10] = 401 - data['COORDINATION'][10] = "site1" - - data['FRAME'][11] = 11 # Add a new 'FRAME' value - data['Prot_partner'][11] = 'A' # Add a new 'Prot_partner' value - data['INTERACTION'][11] = 'saltbridge' # Add 'saltbridge' interaction - data['LIG_IDX_LIST'][11] = [7, 8] # Ligand index list for 'saltbridge PI' - data['LIG_GROUP'][11] = 'Group4' # Ligand group for 'saltbridge PI' - data['PROTISPOS'][11] = False # PROTISPOS is False for 'saltbridge PI' - data['RESTYPE_LIG'][11] = 0 - data['TARGET_IDX'][11] = 0 + data["FRAME"][10] = 10 # Add a new 'FRAME' value + data["Prot_partner"][10] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][10] = "metal" # Add 'metal' interaction + data["METAL_IDX"][10] = 401 # METAL_IDX for 'metal' + data["METAL_TYPE"][10] = "Fe" # Metal type + data["LOCATION"][10] = "site1" # Location + data["ACCEPTOR_IDX"][10] = 0 + data["DONOR_IDX"][10] = 0 + data["RESTYPE_LIG"][10] = "A" + data["TARGET_IDX"][10] = 401 + data["COORDINATION"][10] = "site1" + + data["FRAME"][11] = 11 # Add a new 'FRAME' value + data["Prot_partner"][11] = "A" # Add a new 'Prot_partner' value + data["INTERACTION"][11] = "saltbridge" # Add 'saltbridge' interaction + data["LIG_IDX_LIST"][11] = [7, 8] # Ligand index list for 'saltbridge PI' + data["LIG_GROUP"][11] = "Group4" # Ligand group for 'saltbridge PI' + data["PROTISPOS"][11] = False # PROTISPOS is False for 'saltbridge PI' + data["RESTYPE_LIG"][11] = 0 + data["TARGET_IDX"][11] = 0 # Add 'hydrophobic' case where 'ring_found' is False - data['FRAME'][12] = 12 # Add a new 'FRAME' value - data['Prot_partner'][12] = 'C' # Add a new 'Prot_partner' value - data['INTERACTION'][12] = 'hydrophobic' # Add 'hydrophobic' interaction - data['LIGCARBONIDX'][12] = 104 # LIGCARBONIDX for 'hydrophobic' (not in any ring) - data['RESTYPE_LIG'][12] = 0 - data['TARGET_IDX'][12] = 0 + data["FRAME"][12] = 12 # Add a new 'FRAME' value + data["Prot_partner"][12] = "C" # Add a new 'Prot_partner' value + data["INTERACTION"][12] = "hydrophobic" # Add 'hydrophobic' interaction + data["LIGCARBONIDX"][12] = 104 # LIGCARBONIDX for 'hydrophobic' (not in any ring) + data["RESTYPE_LIG"][12] = 0 + data["TARGET_IDX"][12] = 0 return pd.DataFrame(data) - @pytest.fixture def sample_dataframe_bindingmode_processing_with_peptides(): data = { - 'FRAME': {0: 1, 1: 2, 2: 3, 3: 2}, - 'Prot_partner': {0: '62VAL', 1: 'SER144', 2: 'GLU321', 3: 'ILE432'}, - 'INTERACTION': {0: 'hydrophobic', 1: 'hbond', 2: 'saltbridge', 3: 'hydrophobic'}, - 'LIGCARBONIDX': {0: 101, 1: 102, 2: 103, 3: 102}, - 'DONORIDX': {0: 201, 1: 202, 2: 203, 3: 202}, - 'ACCEPTORIDX': {0: 301, 1: 302, 2: 303, 3: 302}, - 'PROTISDON': {0: True, 1: False, 2: True, 3: False}, - 'LIG_IDX_LIST': {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, - 'LIG_GROUP': {0: 'Group1', 1: 'Group2', 2: 'Group3', 3: 'Group1'}, - 'PROTISPOS': {0: True, 1: False, 2: True, 3: True}, - 'DON_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'DONORTYPE': {0: 0, 1: 0, 2: 0, 3: 0}, - 'ACCEPTOR_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'DONOR_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'LOCATION': {0: 0, 1: 0, 2: 0, 3: 0}, - 'METAL_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'METAL_TYPE': {0: 0, 1: 0, 2: 0, 3: 0}, - 'RESTYPE_LIG': {0: 'ILE', 1: 'TYR', 2: 'ARG', 3: 'VAL'}, - 'TARGET_IDX': {0: 0, 1: 0, 2: 0, 3: 0}, - 'COORDINATION': {0: 0, 1: 0, 2: 0, 3: 0}, - 'RESNR_LIG' : {0: '101', 1: '202', 2: '155', 3: '102'} + "FRAME": {0: 1, 1: 2, 2: 3, 3: 2}, + "Prot_partner": {0: "62VAL", 1: "SER144", 2: "GLU321", 3: "ILE432"}, + "INTERACTION": { + 0: "hydrophobic", + 1: "hbond", + 2: "saltbridge", + 3: "hydrophobic", + }, + "LIGCARBONIDX": {0: 101, 1: 102, 2: 103, 3: 102}, + "DONORIDX": {0: 201, 1: 202, 2: 203, 3: 202}, + "ACCEPTORIDX": {0: 301, 1: 302, 2: 303, 3: 302}, + "PROTISDON": {0: True, 1: False, 2: True, 3: False}, + "LIG_IDX_LIST": {0: [1, 2], 1: [3, 4], 2: [5, 6], 3: [3, 4]}, + "LIG_GROUP": {0: "Group1", 1: "Group2", 2: "Group3", 3: "Group1"}, + "PROTISPOS": {0: True, 1: False, 2: True, 3: True}, + "DON_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "DONORTYPE": {0: 0, 1: 0, 2: 0, 3: 0}, + "ACCEPTOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "DONOR_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "LOCATION": {0: 0, 1: 0, 2: 0, 3: 0}, + "METAL_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "METAL_TYPE": {0: 0, 1: 0, 2: 0, 3: 0}, + "RESTYPE_LIG": {0: "ILE", 1: "TYR", 2: "ARG", 3: "VAL"}, + "TARGET_IDX": {0: 0, 1: 0, 2: 0, 3: 0}, + "COORDINATION": {0: 0, 1: 0, 2: 0, 3: 0}, + "RESNR_LIG": {0: "101", 1: "202", 2: "155", 3: "102"}, } - + # Additional data for peptide interactions - data['FRAME'][4] = 4 - data['Prot_partner'][4] = 'LEU248' - data['INTERACTION'][4] = 'halogen' - data['DON_IDX'][4] = 501 - data['DONORTYPE'][4] = 'F' - data['ACCEPTOR_IDX'][4] = 0 - data['DONOR_IDX'][4] = 0 - data['LIG_IDX_LIST'][4] = 0 - data['LIG_GROUP'][4] = 0 - data['RESTYPE_LIG'][4] = 'ILE' - data['TARGET_IDX'][4] = 0 - data['RESNR_LIG'][4] = '501' - - data['FRAME'][5] = 5 - data['Prot_partner'][5] = 'SER300' - data['INTERACTION'][5] = 'hbond' - data['ACCEPTORIDX'][5] = 301 - data['DON_IDX'][5] = 0 - data['DONORTYPE'][5] = 0 - data['PROTISDON'][5] = True - data['ACCEPTOR_IDX'][5] = 0 - data['LIG_IDX_LIST'][5] = 0 - data['DONOR_IDX'][5] = 0 - data['LIG_GROUP'][5] = 0 - data['RESTYPE_LIG'][5] = 'HIS' - data['TARGET_IDX'][5] = 0 - data['RESNR_LIG'][5] = '301' - - data['FRAME'][6] = 6 - data['Prot_partner'][6] = 'TYR343' - data['INTERACTION'][6] = 'waterbridge' - data['ACCEPTOR_IDX'][6] = 401 - data['DON_IDX'][6] = 0 - data['DONORTYPE'][6] = 0 - data['DONOR_IDX'][6] = 0 - data['LIG_IDX_LIST'][6] = 0 - data['PROTISDON'][6] = True - data['LIG_GROUP'][6] = 0 - data['RESTYPE_LIG'][6] = 'SER' - data['TARGET_IDX'][6] = 0 - data['RESNR_LIG'][6] = '455' - - data['FRAME'][7] = 7 - data['Prot_partner'][7] = 'ILE178' - data['INTERACTION'][7] = 'waterbridge' - data['DONOR_IDX'][7] = 501 - data['DON_IDX'][7] = 0 - data['DONORTYPE'][7] = 0 - data['PROTISDON'][7] = False - data['ACCEPTOR_IDX'][7] = 0 - data['LIG_IDX_LIST'][7] = 0 - data['LIG_GROUP'][7] = 0 - data['RESTYPE_LIG'][7] = 'TYR' - data['TARGET_IDX'][7] = 0 - data['RESNR_LIG'][7] = '467' - - data['FRAME'][8] = 8 - data['Prot_partner'][8] = 'PHE344' - data['INTERACTION'][8] = 'pistacking' - data['LIG_IDX_LIST'][8] = [7, 8] - data['LIG_GROUP'][8] = 0 - data['ACCEPTOR_IDX'][8] = 0 - data['DON_IDX'][8] = 0 - data['DONOR_IDX'][8] = 0 - data['PROTISDON'][8] = False - data['DONORTYPE'][8] = 0 - data['RESTYPE_LIG'][8] = 'PHE' - data['TARGET_IDX'][8] = 0 - data['RESNR_LIG'][8] = '398' - - data['FRAME'][9] = 9 - data['Prot_partner'][9] = 'PHE754' - data['INTERACTION'][9] = 'pication' - data['LIG_IDX_LIST'][9] = [9, 10] - data['LIG_GROUP'][9] = 'B' - data['ACCEPTOR_IDX'][9] = 0 - data['DON_IDX'][9] = 0 - data['PROTISDON'][9] = False - data['DONOR_IDX'][9] = 0 - data['DONORTYPE'][9] = 0 - data['RESTYPE_LIG'][9] = 'ARG' - data['TARGET_IDX'][9] = 0 - data['RESNR_LIG'][9] = '245' - - data['FRAME'][10] = 10 - data['Prot_partner'][10] = 'LYS567' - data['INTERACTION'][10] = 'pication' - data['LIG_IDX_LIST'][10] = [9, 10] - data['LIG_GROUP'][10] = 'B' - data['RESTYPE_LIG'][10] = 'PHE' - data['TARGET_IDX'][10] = 501 - data['RESNR_LIG'][10] = '228' - - data['FRAME'][11] = 11 - data['Prot_partner'][11] = 'LYS567' - data['INTERACTION'][11] = 'saltbridge' - data['LIG_IDX_LIST'][11] = [7, 8] - data['LIG_GROUP'][11] = 'Group4' - data['PROTISPOS'][11] = False - data['RESTYPE_LIG'][11] = 'GLU' - data['TARGET_IDX'][11] = 0 - data['RESNR_LIG'][11] = '423' - - data['FRAME'][12] = 12 - data['Prot_partner'][12] = 'HEM144' - data['INTERACTION'][12] = 'metal' - data['METAL_IDX'][12] = 401 # METAL_IDX for 'metal' - data['METAL_TYPE'][12] = 'Fe' # Metal type - data['LOCATION'][12] = 'site1' # Location - data['ACCEPTOR_IDX'][12] = 0 - data['DON_IDX'][12] = 0 - data['RESTYPE_LIG'][12] = 'HIS' # Assuming 'A' as the RESTYPE_LIG for the metal interaction - data['TARGET_IDX'][12] = 401 - data['COORDINATION'][12] = "site1" - data['RESNR_LIG'][12] = '256' - + data["FRAME"][4] = 4 + data["Prot_partner"][4] = "LEU248" + data["INTERACTION"][4] = "halogen" + data["DON_IDX"][4] = 501 + data["DONORTYPE"][4] = "F" + data["ACCEPTOR_IDX"][4] = 0 + data["DONOR_IDX"][4] = 0 + data["LIG_IDX_LIST"][4] = 0 + data["LIG_GROUP"][4] = 0 + data["RESTYPE_LIG"][4] = "ILE" + data["TARGET_IDX"][4] = 0 + data["RESNR_LIG"][4] = "501" + + data["FRAME"][5] = 5 + data["Prot_partner"][5] = "SER300" + data["INTERACTION"][5] = "hbond" + data["ACCEPTORIDX"][5] = 301 + data["DON_IDX"][5] = 0 + data["DONORTYPE"][5] = 0 + data["PROTISDON"][5] = True + data["ACCEPTOR_IDX"][5] = 0 + data["LIG_IDX_LIST"][5] = 0 + data["DONOR_IDX"][5] = 0 + data["LIG_GROUP"][5] = 0 + data["RESTYPE_LIG"][5] = "HIS" + data["TARGET_IDX"][5] = 0 + data["RESNR_LIG"][5] = "301" + + data["FRAME"][6] = 6 + data["Prot_partner"][6] = "TYR343" + data["INTERACTION"][6] = "waterbridge" + data["ACCEPTOR_IDX"][6] = 401 + data["DON_IDX"][6] = 0 + data["DONORTYPE"][6] = 0 + data["DONOR_IDX"][6] = 0 + data["LIG_IDX_LIST"][6] = 0 + data["PROTISDON"][6] = True + data["LIG_GROUP"][6] = 0 + data["RESTYPE_LIG"][6] = "SER" + data["TARGET_IDX"][6] = 0 + data["RESNR_LIG"][6] = "455" + + data["FRAME"][7] = 7 + data["Prot_partner"][7] = "ILE178" + data["INTERACTION"][7] = "waterbridge" + data["DONOR_IDX"][7] = 501 + data["DON_IDX"][7] = 0 + data["DONORTYPE"][7] = 0 + data["PROTISDON"][7] = False + data["ACCEPTOR_IDX"][7] = 0 + data["LIG_IDX_LIST"][7] = 0 + data["LIG_GROUP"][7] = 0 + data["RESTYPE_LIG"][7] = "TYR" + data["TARGET_IDX"][7] = 0 + data["RESNR_LIG"][7] = "467" + + data["FRAME"][8] = 8 + data["Prot_partner"][8] = "PHE344" + data["INTERACTION"][8] = "pistacking" + data["LIG_IDX_LIST"][8] = [7, 8] + data["LIG_GROUP"][8] = 0 + data["ACCEPTOR_IDX"][8] = 0 + data["DON_IDX"][8] = 0 + data["DONOR_IDX"][8] = 0 + data["PROTISDON"][8] = False + data["DONORTYPE"][8] = 0 + data["RESTYPE_LIG"][8] = "PHE" + data["TARGET_IDX"][8] = 0 + data["RESNR_LIG"][8] = "398" + + data["FRAME"][9] = 9 + data["Prot_partner"][9] = "PHE754" + data["INTERACTION"][9] = "pication" + data["LIG_IDX_LIST"][9] = [9, 10] + data["LIG_GROUP"][9] = "B" + data["ACCEPTOR_IDX"][9] = 0 + data["DON_IDX"][9] = 0 + data["PROTISDON"][9] = False + data["DONOR_IDX"][9] = 0 + data["DONORTYPE"][9] = 0 + data["RESTYPE_LIG"][9] = "ARG" + data["TARGET_IDX"][9] = 0 + data["RESNR_LIG"][9] = "245" + + data["FRAME"][10] = 10 + data["Prot_partner"][10] = "LYS567" + data["INTERACTION"][10] = "pication" + data["LIG_IDX_LIST"][10] = [9, 10] + data["LIG_GROUP"][10] = "B" + data["RESTYPE_LIG"][10] = "PHE" + data["TARGET_IDX"][10] = 501 + data["RESNR_LIG"][10] = "228" + + data["FRAME"][11] = 11 + data["Prot_partner"][11] = "LYS567" + data["INTERACTION"][11] = "saltbridge" + data["LIG_IDX_LIST"][11] = [7, 8] + data["LIG_GROUP"][11] = "Group4" + data["PROTISPOS"][11] = False + data["RESTYPE_LIG"][11] = "GLU" + data["TARGET_IDX"][11] = 0 + data["RESNR_LIG"][11] = "423" + + data["FRAME"][12] = 12 + data["Prot_partner"][12] = "HEM144" + data["INTERACTION"][12] = "metal" + data["METAL_IDX"][12] = 401 # METAL_IDX for 'metal' + data["METAL_TYPE"][12] = "Fe" # Metal type + data["LOCATION"][12] = "site1" # Location + data["ACCEPTOR_IDX"][12] = 0 + data["DON_IDX"][12] = 0 + data["RESTYPE_LIG"][ + 12 + ] = "HIS" # Assuming 'A' as the RESTYPE_LIG for the metal interaction + data["TARGET_IDX"][12] = 401 + data["COORDINATION"][12] = "site1" + data["RESNR_LIG"][12] = "256" + return pd.DataFrame(data) @@ -305,68 +316,66 @@ def test_gather_interactions(sample_dataframe_bindingmode_processing): result = gather_interactions(df, ligand_rings) # Assert that the result is a dictionary - + assert isinstance(result, dict) # Check specific values in the generated dictionary for known interactions based on the updated fixture expected_result = { - 1: {0: 'A_101_hydrophobic'}, - 2: {1: 'B_202_Donor_hbond', 3: 'A_102_hydrophobic'}, - 3: {2: 'C_[5, 6]_Group3_NI_saltbridge'}, - 4: {4: 'A_501_F_halogen'}, - 5: {5: 'A_301_Acceptor_hbond'}, - 6: {6: 'A_401_Acceptor_waterbridge'}, - 7: {7: 'B_501_Donor_waterbridge'}, - 8: {8: 'A_[7, 8]_pistacking'}, - 9: {9: 'A_[9_ 10]_Group4_pication'}, - 10: {10: 'A_401_Fe_site1_metal'}, - 11: {11: 'A_[7, 8]_Group4_PI_saltbridge'}, - 12: {12: 'C_104_hydrophobic'} -} + 1: {0: "A_101_hydrophobic"}, + 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, + 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, + 4: {4: "A_501_F_halogen"}, + 5: {5: "A_301_Acceptor_hbond"}, + 6: {6: "A_401_Acceptor_waterbridge"}, + 7: {7: "B_501_Donor_waterbridge"}, + 8: {8: "A_[7, 8]_pistacking"}, + 9: {9: "A_[9_ 10]_Group4_pication"}, + 10: {10: "A_401_Fe_site1_metal"}, + 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, + 12: {12: "C_104_hydrophobic"}, + } # Check if the actual result matches the expected result assert result == expected_result -def test_gather_interactions_with_peptides(sample_dataframe_bindingmode_processing_with_peptides): + +def test_gather_interactions_with_peptides( + sample_dataframe_bindingmode_processing_with_peptides, +): df = sample_dataframe_bindingmode_processing_with_peptides ligand_rings = [[101], [102], [103]] # Define sample ligand rings for testing result = gather_interactions(df, ligand_rings, peptide=True) - + # Assert that the result is a dictionary assert isinstance(result, dict) - + # Check specific values in the generated dictionary for known interactions based on the updated fixture expected_result = { - 1: {0: '62VAL_101ILE_hydrophobic'}, - 2: {1: 'SER144_202TYR_Donor_hbond', 3: 'ILE432_102VAL_hydrophobic'}, - 3: {2: 'GLU321_155ARG_ARG_NI_saltbridge'}, - 4: {4: 'LEU248_501ILE_F_halogen'}, - 5: {5: 'SER300_301HIS_Acceptor_hbond'}, - 6: {6: 'TYR343_455SER_Acceptor_waterbridge'}, - 7: {7: 'ILE178_467TYR_Donor_waterbridge'}, - 8: {8: 'PHE344_398PHE_pistacking'}, - 9: {9: 'PHE754_245ARG_ARG_pication'}, - 10: {10: 'LYS567_228PHE_PHE_pication'}, - 11: {11: 'LYS567_423GLU_GLU_PI_saltbridge'}, - 12: {12: 'HIS_256HIS_Fe_site1_metal'}, + 1: {0: "62VAL_101ILE_hydrophobic"}, + 2: {1: "SER144_202TYR_Donor_hbond", 3: "ILE432_102VAL_hydrophobic"}, + 3: {2: "GLU321_155ARG_ARG_NI_saltbridge"}, + 4: {4: "LEU248_501ILE_F_halogen"}, + 5: {5: "SER300_301HIS_Acceptor_hbond"}, + 6: {6: "TYR343_455SER_Acceptor_waterbridge"}, + 7: {7: "ILE178_467TYR_Donor_waterbridge"}, + 8: {8: "PHE344_398PHE_pistacking"}, + 9: {9: "PHE754_245ARG_ARG_pication"}, + 10: {10: "LYS567_228PHE_PHE_pication"}, + 11: {11: "LYS567_423GLU_GLU_PI_saltbridge"}, + 12: {12: "HIS_256HIS_Fe_site1_metal"}, } - + # Check if the actual result matches the expected result assert result == expected_result @pytest.fixture def test_remove_duplicates_data(): - input_data = { - 'a': {'x': 1, 'y': 2, 'z': 1}, - 'b': {'p': 3, 'q': 3, 'r': 4} - } - expected_output = { - 'a': {'x': 1, 'y': 2}, - 'b': {'p': 3, 'r': 4} - } + input_data = {"a": {"x": 1, "y": 2, "z": 1}, "b": {"p": 3, "q": 3, "r": 4}} + expected_output = {"a": {"x": 1, "y": 2}, "b": {"p": 3, "r": 4}} return input_data, expected_output + def test_unique_data_generation(): # Test case 1: Check if the function returns an empty dictionary for an empty list result = unique_data_generation([]) @@ -384,68 +393,71 @@ def test_unique_data_generation(): expected_result = {"apple": "apple", "banana": "banana", "cherry": "cherry"} assert result == expected_result - + # Define a test case that uses the fixture def test_remove_duplicate_values(test_remove_duplicates_data): input_data, expected_output = test_remove_duplicates_data assert remove_duplicate_values(input_data) == expected_output + def test_combine_subdict_values(): # Test case 1: Empty input dictionary data = {} result = combine_subdict_values(data) - assert result == {'all': []} + assert result == {"all": []} # Test case 2: Input dictionary with sub-dictionaries data = { - 'dict1': {'a': 1, 'b': 2}, - 'dict2': {'c': 3, 'd': 4}, - 'dict3': {'e': 5, 'f': 6}, + "dict1": {"a": 1, "b": 2}, + "dict2": {"c": 3, "d": 4}, + "dict3": {"e": 5, "f": 6}, } result = combine_subdict_values(data) - assert result == {'all': [1, 2, 3, 4, 5, 6]} + assert result == {"all": [1, 2, 3, 4, 5, 6]} # Test case 3: Input dictionary with empty sub-dictionaries data = { - 'dict1': {}, - 'dict2': {}, + "dict1": {}, + "dict2": {}, } result = combine_subdict_values(data) - assert result == {'all': []} + assert result == {"all": []} # Test case 4: Input dictionary with sub-dictionaries containing various data types data = { - 'dict1': {'a': 1, 'b': 'text', 'c': [1, 2, 3]}, - 'dict2': {'d': None, 'e': 5.5}, + "dict1": {"a": 1, "b": "text", "c": [1, 2, 3]}, + "dict2": {"d": None, "e": 5.5}, } result = combine_subdict_values(data) - assert result == {'all': [1, 'text', [1, 2, 3], None, 5.5]} + assert result == {"all": [1, "text", [1, 2, 3], None, 5.5]} + # Define a sample DataFrame for testing sample_data = { - 'A': [1, 2, 3, 4, 5], - 'B': [2, 3, 4, 5, 6], - 'C': [3, 4, 5, 6, 7], - 'D': [4, 5, 6, 7, 8], + "A": [1, 2, 3, 4, 5], + "B": [2, 3, 4, 5, 6], + "C": [3, 4, 5, 6, 7], + "D": [4, 5, 6, 7, 8], } sample_df = pd.DataFrame(sample_data) # Define the provided 'unique_columns_rings_grouped' data for testing unique_columns_rings_grouped = { - 1: {0: 'A_101_hydrophobic'}, - 2: {1: 'B_202_Donor_hbond', 3: 'A_102_hydrophobic'}, - 3: {2: 'C_[5, 6]_Group3_NI_saltbridge'}, - 4: {4: 'A_501_F_halogen'}, - 5: {5: 'A_301_Acceptor_hbond'}, - 6: {6: 'A_401_Acceptor_waterbridge'}, - 7: {7: 'B_501_Donor_waterbridge'}, - 8: {8: 'A_[7, 8]_pistacking'}, - 9: {9: 'A_[9_ 10]_Group4_pication'}, - 10: {10: 'A_401_Fe_site1_metal'}, - 11: {11: 'A_[7, 8]_Group4_PI_saltbridge'}, - 12: {12: 'C_104_hydrophobic'} + 1: {0: "A_101_hydrophobic"}, + 2: {1: "B_202_Donor_hbond", 3: "A_102_hydrophobic"}, + 3: {2: "C_[5, 6]_Group3_NI_saltbridge"}, + 4: {4: "A_501_F_halogen"}, + 5: {5: "A_301_Acceptor_hbond"}, + 6: {6: "A_401_Acceptor_waterbridge"}, + 7: {7: "B_501_Donor_waterbridge"}, + 8: {8: "A_[7, 8]_pistacking"}, + 9: {9: "A_[9_ 10]_Group4_pication"}, + 10: {10: "A_401_Fe_site1_metal"}, + 11: {11: "A_[7, 8]_Group4_PI_saltbridge"}, + 12: {12: "C_104_hydrophobic"}, } + def test_filtering_values_with_provided_data(): # Test case 1: Check if the function returns a list threshold = 0.2 # 20% threshold @@ -472,324 +484,1333 @@ def test_filtering_values_with_provided_data(): expected_threshold = threshold * frames occurrences = {value: 5 for value in result} # Assume all values occur 5 times assert all(count >= expected_threshold for count in occurrences.values()) - + def test_df_iteration_numbering(): # Sample DataFrame for testing data = { - 'Unnamed: 0': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - 'RESNR': [98, 63, 162, 161, 166, 165, 125, 166, 211, 227, 223, 165, 100, 59, 98, 207, 164, 155, 228], - 'RESTYPE': ['PHE', 'ARG', 'ALA', 'PHE', 'ARG', 'ASP', 'TYR', 'ARG', 'PHE', 'LEU', 'THR', 'ASP', 'ASP', 'ARG', 'PHE', 'PHE', 'LYS', 'HEM', 'SER'], - 'RESCHAIN': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], - 'RESNR_LIG': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'RESTYPE_LIG': ['UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'UNK', 'HEM', 'UNK'], - 'RESCHAIN_LIG': ['X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X'], - 'DIST': [3.46, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 3.36, 3.61, 3.84, 3.62, 3.72, 3.62, 3.99, 3.65, 3.70, 5.16, 2.55, 2.34], - 'LIGCARBONIDX': [4196.0, 0.0, 4214.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4206.0, 4207.0, 4207.0, 4215.0, 4217.0, 4217.0, 4194.0, 4208.0, 0.0, 0.0, 0.0], - '162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '166ARGA_4220,4221_Carboxylate_NI_saltbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '98PHEA_4194_hydrophobic': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '63ARGA_4201_Acceptor_waterbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '166ARGA_4220_Acceptor_hbond': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '98PHEA_4225_Donor_hbond': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '207PHEA_4213,4214,4215,4216,4217,4218_pistacking': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '100ASPA_4005_Donor_waterbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '59ARGA_4222_Acceptor_waterbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'HEM_4255_Fe_4.0_metal': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - '228SERA_4228_F_halogen': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "Unnamed: 0": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + ], + "RESNR": [ + 98, + 63, + 162, + 161, + 166, + 165, + 125, + 166, + 211, + 227, + 223, + 165, + 100, + 59, + 98, + 207, + 164, + 155, + 228, + ], + "RESTYPE": [ + "PHE", + "ARG", + "ALA", + "PHE", + "ARG", + "ASP", + "TYR", + "ARG", + "PHE", + "LEU", + "THR", + "ASP", + "ASP", + "ARG", + "PHE", + "PHE", + "LYS", + "HEM", + "SER", + ], + "RESCHAIN": [ + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + ], + "RESNR_LIG": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "RESTYPE_LIG": [ + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "UNK", + "HEM", + "UNK", + ], + "RESCHAIN_LIG": [ + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + "X", + ], + "DIST": [ + 3.46, + 0.00, + 0.00, + 0.00, + 0.00, + 0.00, + 0.00, + 3.36, + 3.61, + 3.84, + 3.62, + 3.72, + 3.62, + 3.99, + 3.65, + 3.70, + 5.16, + 2.55, + 2.34, + ], + "LIGCARBONIDX": [ + 4196.0, + 0.0, + 4214.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4206.0, + 4207.0, + 4207.0, + 4215.0, + 4217.0, + 4217.0, + 4194.0, + 4208.0, + 0.0, + 0.0, + 0.0, + ], + "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "166ARGA_4220,4221_Carboxylate_NI_saltbridge": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "98PHEA_4194_hydrophobic": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "63ARGA_4201_Acceptor_waterbridge": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "166ARGA_4220_Acceptor_hbond": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "98PHEA_4225_Donor_hbond": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "100ASPA_4005_Donor_waterbridge": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "59ARGA_4222_Acceptor_waterbridge": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "HEM_4255_Fe_4.0_metal": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "228SERA_4228_F_halogen": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], } df = pd.DataFrame(data) interactions = [ - 'hbond', - 'waterbridge', - 'hydrophobic', - 'hbond', - 'hbond', - 'hbond', - 'hbond', - 'saltbridge', - 'hydrophobic', - 'hydrophobic', - 'hydrophobic', - 'hydrophobic', - 'waterbridge', - 'waterbridge', - 'hydrophobic', - 'pistacking', - 'pication', - 'metal', - 'halogen' -] - df['INTERACTION'] = interactions - - + "hbond", + "waterbridge", + "hydrophobic", + "hbond", + "hbond", + "hbond", + "hbond", + "saltbridge", + "hydrophobic", + "hydrophobic", + "hydrophobic", + "hydrophobic", + "waterbridge", + "waterbridge", + "hydrophobic", + "pistacking", + "pication", + "metal", + "halogen", + ] + df["INTERACTION"] = interactions + # Define the values for the "PROTISDON" column - protisdon_values = [False, True, True, True, True, True, True, 0, 0, 0, 0, 0, False, True, 0, 0, 0, 0, 0] + protisdon_values = [ + False, + True, + True, + True, + True, + True, + True, + 0, + 0, + 0, + 0, + 0, + False, + True, + 0, + 0, + 0, + 0, + 0, + ] # Update the "PROTISDON" column in the DataFrame - df['PROTISDON'] = protisdon_values + df["PROTISDON"] = protisdon_values # Define the values for the "Prot_partner" column - prot_partner_values = ['98PHEA', '63ARGA', '162ALAA', '161PHEA', '166ARGA', '165ASPA', '125TYRA', '166ARGA', '211PHEA', '227LEUA', '223THRA', '165ASPA', '100ASPA', '59ARGA', '98PHEA', '207PHEA', '164LYSA', '105HEM', '228SERA'] + prot_partner_values = [ + "98PHEA", + "63ARGA", + "162ALAA", + "161PHEA", + "166ARGA", + "165ASPA", + "125TYRA", + "166ARGA", + "211PHEA", + "227LEUA", + "223THRA", + "165ASPA", + "100ASPA", + "59ARGA", + "98PHEA", + "207PHEA", + "164LYSA", + "105HEM", + "228SERA", + ] # Update the "Prot_partner" column in the DataFrame - df['Prot_partner'] = prot_partner_values + df["Prot_partner"] = prot_partner_values # Define the values for the "ACCEPTORIDX" column - acceptoridx_values = [0.0, 0.0, 4221.0, 4221.0, 4220.0, 4220.0, 4192.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + acceptoridx_values = [ + 0.0, + 0.0, + 4221.0, + 4221.0, + 4220.0, + 4220.0, + 4192.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] # Update the "ACCEPTORIDX" column in the DataFrame - df['ACCEPTORIDX'] = acceptoridx_values + df["ACCEPTORIDX"] = acceptoridx_values # Define the values for the "DONORIDX" column - donoridx_values = [4225.0, 0.0, 2417.0, 2397.0, 2468.0, 2456.0, 1828.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + donoridx_values = [ + 4225.0, + 0.0, + 2417.0, + 2397.0, + 2468.0, + 2456.0, + 1828.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] # Update the "DONORIDX" column in the DataFrame - df['DONORIDX'] = donoridx_values + df["DONORIDX"] = donoridx_values # Define the values for the "ACCEPTOR_IDX" column - acceptor_idx_values = [0.0, 4201.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4222.0, 0.0, 0.0, 0.0, 0.0, 0.0] + acceptor_idx_values = [ + 0.0, + 4201.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4222.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] # Add the "ACCEPTOR_IDX" column to the DataFrame - df['ACCEPTOR_IDX'] = acceptor_idx_values + df["ACCEPTOR_IDX"] = acceptor_idx_values # Define the values for the "DONOR_IDX" column - donor_idx_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4005.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4228.0] + donor_idx_values = [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4005.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4228.0, + ] # Add the "DONOR_IDX" column to the DataFrame - df['DONOR_IDX'] = donor_idx_values + df["DONOR_IDX"] = donor_idx_values # Define the values for the "DON_IDX" column - don_idx_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4005.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4228.0] + don_idx_values = [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4005.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 4228.0, + ] # Add the "DON_IDX" column to the DataFrame - df['DON_IDX'] = don_idx_values - + df["DON_IDX"] = don_idx_values + # Define the values for the "LIG_IDX_LIST" column - lig_idx_list_values = [0, 0, 0, 0, 0, 0, 0, "4220,4221", 0, 0, 0, 0, 0, 0, 0, "4213,4214,4215,4216,4217,4218", "4213,4214,4215,4216,4217,4218", 0, 0] + lig_idx_list_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "4220,4221", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "4213,4214,4215,4216,4217,4218", + "4213,4214,4215,4216,4217,4218", + 0, + 0, + ] # Add the "LIG_IDX_LIST" column to the DataFrame - df['LIG_IDX_LIST'] = lig_idx_list_values + df["LIG_IDX_LIST"] = lig_idx_list_values # Define the values for the "LIG_GROUP" column - lig_group_values = [0, 0, 0, 0, 0, 0, 0, "Carboxylate", 0, 0, 0, 0, 0, 0, 0, "Aromatic", "Aromatic", 0, 0] + lig_group_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "Carboxylate", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "Aromatic", + "Aromatic", + 0, + 0, + ] # Add the "LIG_GROUP" column to the DataFrame - df['LIG_GROUP'] = lig_group_values + df["LIG_GROUP"] = lig_group_values # Define the values for the "TARGET_IDX" column target_idx_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4255, 0] # Add the "TARGET_IDX" column to the DataFrame - df['TARGET_IDX'] = target_idx_values + df["TARGET_IDX"] = target_idx_values # Define the values for the "METAL_TYPE" column metal_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "Fe", 0] # Add the "METAL_TYPE" column to the DataFrame - df['METAL_TYPE'] = metal_type_values + df["METAL_TYPE"] = metal_type_values # Define the values for the "COORDINATION" column coordination_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0] # Add the "COORDINATION" column to the DataFrame - df['COORDINATION'] = coordination_values + df["COORDINATION"] = coordination_values # Define the values for the "DONORTYPE" column donor_type_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "F"] # Add the "DONORTYPE" column to the DataFrame - df['DONORTYPE'] = donor_type_values + df["DONORTYPE"] = donor_type_values - # Updated unique_data dictionary unique_data = { - '63ARGA_4201_Acceptor_waterbridge': '63ARGA_4201_Acceptor_waterbridge', - '166ARGA_4220_Acceptor_hbond': '166ARGA_4220_Acceptor_hbond', - '166ARGA_4220,4221_Carboxylate_NI_saltbridge': '166ARGA_4220,4221_Carboxylate_NI_saltbridge', - '162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic': '162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic', - '98PHEA_4194_hydrophobic': '98PHEA_4194_hydrophobic', - '98PHEA_4225_Donor_hbond': '98PHEA_4225_Donor_hbond', - '164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication': '164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication', - '207PHEA_4213,4214,4215,4216,4217,4218_pistacking': '207PHEA_4213,4214,4215,4216,4217,4218_pistacking', - '59ARGA_4222_Acceptor_waterbridge': '59ARGA_4222_Acceptor_waterbridge', - '100ASPA_4005_Donor_waterbridge': '100ASPA_4005_Donor_waterbridge', - 'HEM_4255_Fe_4.0_metal': 'HEM_4255_Fe_4.0_metal', - '228SERA_4228_F_halogen': '228SERA_4228_F_halogen' + "63ARGA_4201_Acceptor_waterbridge": "63ARGA_4201_Acceptor_waterbridge", + "166ARGA_4220_Acceptor_hbond": "166ARGA_4220_Acceptor_hbond", + "166ARGA_4220,4221_Carboxylate_NI_saltbridge": "166ARGA_4220,4221_Carboxylate_NI_saltbridge", + "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic": "162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic", + "98PHEA_4194_hydrophobic": "98PHEA_4194_hydrophobic", + "98PHEA_4225_Donor_hbond": "98PHEA_4225_Donor_hbond", + "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication": "164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication", + "207PHEA_4213,4214,4215,4216,4217,4218_pistacking": "207PHEA_4213,4214,4215,4216,4217,4218_pistacking", + "59ARGA_4222_Acceptor_waterbridge": "59ARGA_4222_Acceptor_waterbridge", + "100ASPA_4005_Donor_waterbridge": "100ASPA_4005_Donor_waterbridge", + "HEM_4255_Fe_4.0_metal": "HEM_4255_Fe_4.0_metal", + "228SERA_4228_F_halogen": "228SERA_4228_F_halogen", } - # Call the function with the sample DataFrame and unique_data df_iteration_numbering(df, unique_data) expected_162ALAA_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (df['162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic'] == expected_162ALAA_values).all() - - expected_98PHEA_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert (df['98PHEA_4194_hydrophobic'] == expected_98PHEA_values).all() - - expected_166ARGA_4220_Acceptor_hbond_values = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (df['166ARGA_4220_Acceptor_hbond'] == expected_166ARGA_4220_Acceptor_hbond_values).all() - - expected_Carboxylate_NI_saltbridge_values = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (df['166ARGA_4220,4221_Carboxylate_NI_saltbridge'] == expected_Carboxylate_NI_saltbridge_values).all() - - expected_63ARGA_4201_Acceptor_waterbridge_values = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (df['63ARGA_4201_Acceptor_waterbridge'] == expected_63ARGA_4201_Acceptor_waterbridge_values).all() - - expected_98PHEA_4225_Donor_hbond_values = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (df['98PHEA_4225_Donor_hbond'] == expected_98PHEA_4225_Donor_hbond_values).all() - - expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] - assert (df['164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication'] == expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values).all() - - expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] - assert (df['207PHEA_4213,4214,4215,4216,4217,4218_pistacking'] == expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values).all() + assert ( + df["162ALAA_4214_4215_4216_4217_4218_4213_hydrophobic"] + == expected_162ALAA_values + ).all() - expected_59ARGA_4222_Acceptor_waterbridge_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] - assert (df['59ARGA_4222_Acceptor_waterbridge'] == expected_59ARGA_4222_Acceptor_waterbridge_values).all() - - expected_100ASPA_4005_Donor_waterbridge_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] - assert (df['100ASPA_4005_Donor_waterbridge'] == expected_100ASPA_4005_Donor_waterbridge_values).all() - - expected_HEM_4255_Fe_4_metal_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] - assert (df['HEM_4255_Fe_4.0_metal'] == expected_HEM_4255_Fe_4_metal_values).all() + expected_98PHEA_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] + assert (df["98PHEA_4194_hydrophobic"] == expected_98PHEA_values).all() + + expected_166ARGA_4220_Acceptor_hbond_values = [ + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["166ARGA_4220_Acceptor_hbond"] == expected_166ARGA_4220_Acceptor_hbond_values + ).all() + + expected_Carboxylate_NI_saltbridge_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["166ARGA_4220,4221_Carboxylate_NI_saltbridge"] + == expected_Carboxylate_NI_saltbridge_values + ).all() + + expected_63ARGA_4201_Acceptor_waterbridge_values = [ + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["63ARGA_4201_Acceptor_waterbridge"] + == expected_63ARGA_4201_Acceptor_waterbridge_values + ).all() + + expected_98PHEA_4225_Donor_hbond_values = [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["98PHEA_4225_Donor_hbond"] == expected_98PHEA_4225_Donor_hbond_values + ).all() + + expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ] + assert ( + df["164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication"] + == expected_164LYSA_4213_4214_4215_4216_4217_4218_Aromatic_pication_values + ).all() + + expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + ] + assert ( + df["207PHEA_4213,4214,4215,4216,4217,4218_pistacking"] + == expected_207PHEA_4213_4214_4215_4216_4217_4218_pistacking_values + ).all() + + expected_59ARGA_4222_Acceptor_waterbridge_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["59ARGA_4222_Acceptor_waterbridge"] + == expected_59ARGA_4222_Acceptor_waterbridge_values + ).all() + + expected_100ASPA_4005_Donor_waterbridge_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + df["100ASPA_4005_Donor_waterbridge"] + == expected_100ASPA_4005_Donor_waterbridge_values + ).all() + + expected_HEM_4255_Fe_4_metal_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + ] + assert (df["HEM_4255_Fe_4.0_metal"] == expected_HEM_4255_Fe_4_metal_values).all() + + expected_228SERA_4228_F_halogen_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + ] + assert ( + df["228SERA_4228_F_halogen"] == expected_228SERA_4228_F_halogen_values + ).all() - expected_228SERA_4228_F_halogen_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - assert (df['228SERA_4228_F_halogen'] == expected_228SERA_4228_F_halogen_values).all() @pytest.fixture def sample_dataframe_it_peptides(): # Create a sample DataFrame for testing data = { - 'Unnamed: 0': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - 'Prot_partner': ['62VAL', 'SER144', 'GLU321', 'ILE432', 'LEU248', 'SER300', 'TYR343', 'ILE178', 'PHE344', 'PHE754', 'LYS567', 'LYS567', 'HIS'], - 'LIGCARBONIDX': [101, 202, 155, 102, 501, 301, 467, 467, 398, 245, 228, 423, 256], - 'INTERACTION': ['hydrophobic', 'hbond', 'saltbridge', 'hydrophobic', 'halogen', 'hbond', 'waterbridge', 'waterbridge', 'pistacking', 'pication', 'pication', 'saltbridge', 'metal'], - 'PROTISDON': [None, False, None, None, None, True, True, False, None, None, None, False, None], - 'ACCEPTORIDX': [None, 202, None, None, None, 301, None, None, None, None, None, None, None], - 'RESNR_LIG': [101, 202, 155, 102, 501, 301, 455, 467, 398, 245, 228, 423, 256], - 'DONORIDX': [None, None, None, None, None, None, None, None, None, None, None, None, None], - 'DONORTYPE': [None, None, None, None, 'F', None, None, None, None, None, None, None, None], - 'LIG_IDX_LIST': [None, None, None, None, None, None, None, None, None, None, None, None, None], - 'RESTYPE_LIG': ['VAL', 'TYR', 'ARG', 'VAL', 'ILE', 'HIS', 'SER', 'TYR', 'PHE', 'ARG', 'PHE', 'GLU', 'HIS'], - 'TARGET_IDX': [None, None, None, None, None, None, None, None, None, None, None, None, None], - 'METAL_TYPE': [None, None, None, None, None, None, None, None, None, None, None, None, 'Fe'], - '62VAL_101ILE_hydrophobic': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'SER144_202TYR_Donor_hbond': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'GLU321_155ARG_ARG_NI_saltbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'ILE432_102VAL_hydrophobic': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'LEU248_501ILE_F_halogen': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'SER300_301HIS_Acceptor_hbond': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'TYR343_455SER_Acceptor_waterbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'ILE178_467TYR_Donor_waterbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'PHE344_398PHE_pistacking': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'PHE754_245ARG_ARG_pication': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'LYS567_228PHE_PHE_pication': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'LYS567_423GLU_GLU_PI_saltbridge': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'HIS_256HIS_Fe_site1_metal': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + "Unnamed: 0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + "Prot_partner": [ + "62VAL", + "SER144", + "GLU321", + "ILE432", + "LEU248", + "SER300", + "TYR343", + "ILE178", + "PHE344", + "PHE754", + "LYS567", + "LYS567", + "HIS", + ], + "LIGCARBONIDX": [ + 101, + 202, + 155, + 102, + 501, + 301, + 467, + 467, + 398, + 245, + 228, + 423, + 256, + ], + "INTERACTION": [ + "hydrophobic", + "hbond", + "saltbridge", + "hydrophobic", + "halogen", + "hbond", + "waterbridge", + "waterbridge", + "pistacking", + "pication", + "pication", + "saltbridge", + "metal", + ], + "PROTISDON": [ + None, + False, + None, + None, + None, + True, + True, + False, + None, + None, + None, + False, + None, + ], + "ACCEPTORIDX": [ + None, + 202, + None, + None, + None, + 301, + None, + None, + None, + None, + None, + None, + None, + ], + "RESNR_LIG": [101, 202, 155, 102, 501, 301, 455, 467, 398, 245, 228, 423, 256], + "DONORIDX": [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + "DONORTYPE": [ + None, + None, + None, + None, + "F", + None, + None, + None, + None, + None, + None, + None, + None, + ], + "LIG_IDX_LIST": [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + "RESTYPE_LIG": [ + "VAL", + "TYR", + "ARG", + "VAL", + "ILE", + "HIS", + "SER", + "TYR", + "PHE", + "ARG", + "PHE", + "GLU", + "HIS", + ], + "TARGET_IDX": [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + "METAL_TYPE": [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + "Fe", + ], + "62VAL_101ILE_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "SER144_202TYR_Donor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "GLU321_155ARG_ARG_NI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "ILE432_102VAL_hydrophobic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "LEU248_501ILE_F_halogen": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "SER300_301HIS_Acceptor_hbond": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "TYR343_455SER_Acceptor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "ILE178_467TYR_Donor_waterbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "PHE344_398PHE_pistacking": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "PHE754_245ARG_ARG_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "LYS567_228PHE_PHE_pication": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "LYS567_423GLU_GLU_PI_saltbridge": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "HIS_256HIS_Fe_site1_metal": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], } - + df = pd.DataFrame(data) return df + def test_df_iteration_numbering_with_peptide(sample_dataframe_it_peptides): unique_data = { - 0: '62VAL_101ILE_hydrophobic', - 1: 'SER144_202TYR_Donor_hbond', - 2: 'GLU321_155ARG_ARG_NI_saltbridge', - 3: 'ILE432_102VAL_hydrophobic', - 4: 'LEU248_501ILE_F_halogen', - 5: 'SER300_301HIS_Acceptor_hbond', - 6: 'TYR343_455SER_Acceptor_waterbridge', - 7: 'ILE178_467TYR_Donor_waterbridge', - 8: 'PHE344_398PHE_pistacking', - 9: 'PHE754_245ARG_ARG_pication', - 10: 'LYS567_228PHE_PHE_pication', - 11: 'LYS567_423GLU_GLU_PI_saltbridge', - 12: 'HIS_256HIS_Fe_site1_metal', + 0: "62VAL_101ILE_hydrophobic", + 1: "SER144_202TYR_Donor_hbond", + 2: "GLU321_155ARG_ARG_NI_saltbridge", + 3: "ILE432_102VAL_hydrophobic", + 4: "LEU248_501ILE_F_halogen", + 5: "SER300_301HIS_Acceptor_hbond", + 6: "TYR343_455SER_Acceptor_waterbridge", + 7: "ILE178_467TYR_Donor_waterbridge", + 8: "PHE344_398PHE_pistacking", + 9: "PHE754_245ARG_ARG_pication", + 10: "LYS567_228PHE_PHE_pication", + 11: "LYS567_423GLU_GLU_PI_saltbridge", + 12: "HIS_256HIS_Fe_site1_metal", } df_iteration_numbering(sample_dataframe_it_peptides, unique_data, peptide=True) # Assertions similar to the provided ones expected_101ILE_hydrophobic_values = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['62VAL_101ILE_hydrophobic'] == expected_101ILE_hydrophobic_values).all() + assert ( + sample_dataframe_it_peptides["62VAL_101ILE_hydrophobic"] + == expected_101ILE_hydrophobic_values + ).all() expected_202TYR_Donor_hbond_values = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['SER144_202TYR_Donor_hbond'] == expected_202TYR_Donor_hbond_values).all() + assert ( + sample_dataframe_it_peptides["SER144_202TYR_Donor_hbond"] + == expected_202TYR_Donor_hbond_values + ).all() expected_155ARG_ARG_NI_saltbridge_values = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['GLU321_155ARG_ARG_NI_saltbridge'] == expected_155ARG_ARG_NI_saltbridge_values).all() + assert ( + sample_dataframe_it_peptides["GLU321_155ARG_ARG_NI_saltbridge"] + == expected_155ARG_ARG_NI_saltbridge_values + ).all() expected_102VAL_hydrophobic_values = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['ILE432_102VAL_hydrophobic'] == expected_102VAL_hydrophobic_values).all() + assert ( + sample_dataframe_it_peptides["ILE432_102VAL_hydrophobic"] + == expected_102VAL_hydrophobic_values + ).all() expected_501ILE_halogen_values = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['LEU248_501ILE_F_halogen'] == expected_501ILE_halogen_values).all() + assert ( + sample_dataframe_it_peptides["LEU248_501ILE_F_halogen"] + == expected_501ILE_halogen_values + ).all() expected_301HIS_Acceptor_hbond_values = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['SER300_301HIS_Acceptor_hbond'] == expected_301HIS_Acceptor_hbond_values).all() - - expected_455SER_Acceptor_waterbridge_values = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['TYR343_455SER_Acceptor_waterbridge'] == expected_455SER_Acceptor_waterbridge_values).all() + assert ( + sample_dataframe_it_peptides["SER300_301HIS_Acceptor_hbond"] + == expected_301HIS_Acceptor_hbond_values + ).all() + + expected_455SER_Acceptor_waterbridge_values = [ + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + ] + assert ( + sample_dataframe_it_peptides["TYR343_455SER_Acceptor_waterbridge"] + == expected_455SER_Acceptor_waterbridge_values + ).all() expected_467TYR_Donor_waterbridge_values = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['ILE178_467TYR_Donor_waterbridge'] == expected_467TYR_Donor_waterbridge_values).all() + assert ( + sample_dataframe_it_peptides["ILE178_467TYR_Donor_waterbridge"] + == expected_467TYR_Donor_waterbridge_values + ).all() expected_398PHE_pistacking_values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] - assert (sample_dataframe_it_peptides['PHE344_398PHE_pistacking'] == expected_398PHE_pistacking_values).all() + assert ( + sample_dataframe_it_peptides["PHE344_398PHE_pistacking"] + == expected_398PHE_pistacking_values + ).all() expected_245ARG_ARG_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] - assert (sample_dataframe_it_peptides['PHE754_245ARG_ARG_pication'] == expected_245ARG_ARG_pication_values).all() + assert ( + sample_dataframe_it_peptides["PHE754_245ARG_ARG_pication"] + == expected_245ARG_ARG_pication_values + ).all() expected_228PHE_PHE_pication_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] - assert (sample_dataframe_it_peptides['LYS567_228PHE_PHE_pication'] == expected_228PHE_PHE_pication_values).all() + assert ( + sample_dataframe_it_peptides["LYS567_228PHE_PHE_pication"] + == expected_228PHE_PHE_pication_values + ).all() expected_423GLU_GLU_PI_saltbridge_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] - assert (sample_dataframe_it_peptides['LYS567_423GLU_GLU_PI_saltbridge'] == expected_423GLU_GLU_PI_saltbridge_values).all() + assert ( + sample_dataframe_it_peptides["LYS567_423GLU_GLU_PI_saltbridge"] + == expected_423GLU_GLU_PI_saltbridge_values + ).all() expected_256HIS_Fe_site1_metal_values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - assert (sample_dataframe_it_peptides['HIS_256HIS_Fe_site1_metal'] == expected_256HIS_Fe_site1_metal_values).all() + assert ( + sample_dataframe_it_peptides["HIS_256HIS_Fe_site1_metal"] + == expected_256HIS_Fe_site1_metal_values + ).all() + @pytest.fixture def sample_data(): # Create sample data for testing - df = pd.DataFrame({ - 'FRAME': [1, 2, 3], - 'Column1': [10, 20, 30], - 'Column2': [40, 50, 60] - }) + df = pd.DataFrame( + {"FRAME": [1, 2, 3], "Column1": [10, 20, 30], "Column2": [40, 50, 60]} + ) - new_df = pd.DataFrame({ - 'FRAME': [1, 2, 3], - 'Column1': [100, 200, 300], - 'Column2': [400, 500, 600] - }) + new_df = pd.DataFrame( + {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} + ) - unique_data = {'Column1': 'Column1', 'Column2': 'Column2'} + unique_data = {"Column1": "Column1", "Column2": "Column2"} return df, new_df, unique_data + def test_update_values(sample_data): # Arrange df, new_df, unique_data = sample_data # Set 'FRAME' as the index for new_df - new_df = new_df.set_index('FRAME') + new_df = new_df.set_index("FRAME") # Act update_values(df, new_df, unique_data) # Assert - expected_df = pd.DataFrame({ - 'FRAME': [1, 2, 3], - 'Column1': [100, 200, 300], - 'Column2': [400, 500, 600] - }) + expected_df = pd.DataFrame( + {"FRAME": [1, 2, 3], "Column1": [100, 200, 300], "Column2": [400, 500, 600]} + ) # Check if the specific values are updated - assert df[['Column1', 'Column2']].equals(expected_df[['Column1', 'Column2']]) + assert df[["Column1", "Column2"]].equals(expected_df[["Column1", "Column2"]]) diff --git a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py b/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py index 4be61a59..07918a70 100644 --- a/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py +++ b/openmmdl/tests/openmmdl_analysis/openmmdlanalysis_test.py @@ -5,15 +5,60 @@ from pathlib import Path -from openmmdl.openmmdl_analysis.preprocessing import process_pdb_file, convert_pdb_to_sdf, increase_ring_indices, convert_ligand_to_smiles -from openmmdl.openmmdl_analysis.rmsd_calculation import rmsd_for_atomgroups, RMSD_dist_frames -from openmmdl.openmmdl_analysis.interaction_gathering import characterize_complex, retrieve_plip_interactions, create_df_from_binding_site, process_frame, process_trajectory, fill_missing_frames -from openmmdl.openmmdl_analysis.binding_mode_processing import gather_interactions, remove_duplicate_values, combine_subdict_values, filtering_values, unique_data_generation, df_iteration_numbering, update_values -from openmmdl.openmmdl_analysis.markov_state_figure_generation import min_transition_calculation, binding_site_markov_network -from openmmdl.openmmdl_analysis.rdkit_figure_generation import split_interaction_data, highlight_numbers, generate_interaction_dict, update_dict, create_and_merge_images, arranged_figure_generation -from openmmdl.openmmdl_analysis.barcode_generation import barcodegeneration,plot_barcodes,plot_waterbridge_piechart -from openmmdl.openmmdl_analysis.visualization_functions import interacting_water_ids, save_interacting_waters_trajectory, cloud_json_generation -from openmmdl.openmmdl_analysis.pml_writer import generate_md_pharmacophore_cloudcenters, generate_bindingmode_pharmacophore, generate_pharmacophore_centers_all_points, generate_point_cloud_pml +from openmmdl.openmmdl_analysis.preprocessing import ( + process_pdb_file, + increase_ring_indices, + convert_ligand_to_smiles, +) +from openmmdl.openmmdl_analysis.rmsd_calculation import ( + rmsd_for_atomgroups, + RMSD_dist_frames, +) +from openmmdl.openmmdl_analysis.interaction_gathering import ( + characterize_complex, + retrieve_plip_interactions, + create_df_from_binding_site, + process_frame, + process_trajectory, + fill_missing_frames, +) +from openmmdl.openmmdl_analysis.binding_mode_processing import ( + gather_interactions, + remove_duplicate_values, + combine_subdict_values, + filtering_values, + unique_data_generation, + df_iteration_numbering, + update_values, +) +from openmmdl.openmmdl_analysis.markov_state_figure_generation import ( + min_transition_calculation, + binding_site_markov_network, +) +from openmmdl.openmmdl_analysis.rdkit_figure_generation import ( + split_interaction_data, + highlight_numbers, + generate_interaction_dict, + update_dict, + create_and_merge_images, + arranged_figure_generation, +) +from openmmdl.openmmdl_analysis.barcode_generation import ( + barcodegeneration, + plot_barcodes, + plot_waterbridge_piechart, +) +from openmmdl.openmmdl_analysis.visualization_functions import ( + interacting_water_ids, + save_interacting_waters_trajectory, + cloud_json_generation, +) +from openmmdl.openmmdl_analysis.pml_writer import ( + generate_md_pharmacophore_cloudcenters, + generate_bindingmode_pharmacophore, + generate_pharmacophore_centers_all_points, + generate_point_cloud_pml, +) # Print current working directory print("Current working directory:", os.getcwd()) @@ -24,13 +69,14 @@ test_data_directory = Path("openmmdl/tests/data/in") + @pytest.fixture(scope="session") def test_data_dir(tmp_path_factory): data_dir = tmp_path_factory.mktemp("test_data") return data_dir -#def test_script_execution(test_data_dir): +# def test_script_execution(test_data_dir): # # Define the root directory # root_dir = "/home/runner/work/OpenMMDL/OpenMMDL" # @@ -46,11 +92,11 @@ def test_data_dir(tmp_path_factory): # trajectory_file = os.path.join(root_dir, "openmmdl/tests/data/in/all_50.dcd") # ligand_sdf_file = os.path.join(input_data_path, "lig.sdf") # ligand_name = "UNK" - + # cmd = f" openmmdl_analysis -t {topology_file} -d {trajectory_file} -l {ligand_sdf_file} -n {ligand_name} -b 10 -c 2" - + # result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=test_data_dir) - + # assert result.returncode == 0, f"Script execution failed with error:\n{result.stderr.decode()}" # # Check that expected output files are generated @@ -67,7 +113,7 @@ def test_data_dir(tmp_path_factory): # assert os.path.exists(hydro_file_path), "The 'hydrophobic_barcodes.png' file is missing inside 'Barcodes'." - # Check for the presence of "Binding_Modes_Markov_States" folder +# Check for the presence of "Binding_Modes_Markov_States" folder # markov_states_folder = os.path.join(test_data_dir, "Binding_Modes_Markov_States") # assert os.path.exists(markov_states_folder), "The 'Binding_Modes_Markov_States' folder is missing." diff --git a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py b/openmmdl/tests/openmmdl_analysis/pml_writer_test.py index 01fc0b01..f68b0ce8 100644 --- a/openmmdl/tests/openmmdl_analysis/pml_writer_test.py +++ b/openmmdl/tests/openmmdl_analysis/pml_writer_test.py @@ -8,29 +8,44 @@ import pytest from openmmdl.openmmdl_analysis.pml_writer import * -#pml_writer tests + +# pml_writer tests @pytest.fixture def sample_dataframe_generate_pharmacophore_centers(): data = { - 'Hydrophobic': [1, 1, 0, 1, 0], - 'Ionic': [0, 1, 0, 0, 1], - 'LIGCOO': ["(1.0, 2.0, 3.0)", "(2.0, 3.0, 4.0)", "(3.0, 4.0, 5.0)", "(4.0, 5.0, 6.0)", "(5.0, 6.0, 7.0)"] + "Hydrophobic": [1, 1, 0, 1, 0], + "Ionic": [0, 1, 0, 0, 1], + "LIGCOO": [ + "(1.0, 2.0, 3.0)", + "(2.0, 3.0, 4.0)", + "(3.0, 4.0, 5.0)", + "(4.0, 5.0, 6.0)", + "(5.0, 6.0, 7.0)", + ], } df = pd.DataFrame(data) return df + @pytest.fixture def sample_interactions_generate_pharmacophore_centers(): - return ['Hydrophobic', 'Ionic'] + return ["Hydrophobic", "Ionic"] + + +def test_generate_pharmacophore_centers( + sample_dataframe_generate_pharmacophore_centers, + sample_interactions_generate_pharmacophore_centers, +): + result = generate_pharmacophore_centers( + sample_dataframe_generate_pharmacophore_centers, + sample_interactions_generate_pharmacophore_centers, + ) -def test_generate_pharmacophore_centers(sample_dataframe_generate_pharmacophore_centers, sample_interactions_generate_pharmacophore_centers): - result = generate_pharmacophore_centers(sample_dataframe_generate_pharmacophore_centers, sample_interactions_generate_pharmacophore_centers) - expected_pharmacophore = { - 'Hydrophobic': [2.333, 3.333, 4.333], - 'Ionic': [3.5, 4.5, 5.5] + "Hydrophobic": [2.333, 3.333, 4.333], + "Ionic": [3.5, 4.5, 5.5], } - + assert result == expected_pharmacophore @@ -38,57 +53,72 @@ def test_generate_pharmacophore_centers(sample_dataframe_generate_pharmacophore_ def sample_dataframe_generate_pharmacophore_vectors(): # Create a sample dataframe for testing data = { - 'HBDonors': [1, 0, 1, 0, 1], - 'HBAcceptors': [0, 1, 0, 1, 0], - 'LIGCOO': [ + "HBDonors": [1, 0, 1, 0, 1], + "HBAcceptors": [0, 1, 0, 1, 0], + "LIGCOO": [ "(1.0, 2.0, 3.0)", "(2.0, 3.0, 4.0)", "(3.0, 4.0, 5.0)", "(4.0, 5.0, 6.0)", - "(5.0, 6.0, 7.0)" + "(5.0, 6.0, 7.0)", ], - 'PROTCOO': [ + "PROTCOO": [ "(0.5, 1.5, 2.5)", "(1.5, 2.5, 3.5)", "(2.5, 3.5, 4.5)", "(3.5, 4.5, 5.5)", - "(4.5, 5.5, 6.5)" - ] + "(4.5, 5.5, 6.5)", + ], } df = pd.DataFrame(data) return df + @pytest.fixture def sample_interactions_generate_pharmacophore_vectors(): - return ['HBDonors', 'HBAcceptors'] + return ["HBDonors", "HBAcceptors"] + + +def test_generate_pharmacophore_vectors( + sample_dataframe_generate_pharmacophore_vectors, + sample_interactions_generate_pharmacophore_vectors, +): + result = generate_pharmacophore_vectors( + sample_dataframe_generate_pharmacophore_vectors, + sample_interactions_generate_pharmacophore_vectors, + ) -def test_generate_pharmacophore_vectors(sample_dataframe_generate_pharmacophore_vectors, sample_interactions_generate_pharmacophore_vectors): - result = generate_pharmacophore_vectors(sample_dataframe_generate_pharmacophore_vectors, sample_interactions_generate_pharmacophore_vectors) - expected_pharmacophore = { - 'HBDonors': [ - [3.0, 4.0, 5.0], - [2.5, 3.5, 4.5] - ], - 'HBAcceptors': [ - [3.0, 4.0, 5.0], - [2.5, 3.5, 4.5] - ] + "HBDonors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], + "HBAcceptors": [[3.0, 4.0, 5.0], [2.5, 3.5, 4.5]], } assert result == expected_pharmacophore + def test_generate_md_pharmacophore_cloudcenters(tmp_path): # Sample data for the DataFrame data = { - 'Acceptor_hbond_1': [1, 0, 1, 0, 1], - 'Donor_hbond_1': [0, 1, 0, 1, 0], - 'pistacking_1': [1, 0, 0, 1, 1], - 'hydrophobic_1': [0, 1, 0, 1, 0], - 'PI_saltbridge_1': [1, 0, 1, 0, 1], - 'NI_saltbridge_1': [0, 1, 0, 1, 0], - 'LIGCOO': ['(1.0, 2.0, 3.0)', '(2.0, 3.0, 4.0)', '(3.0, 4.0, 5.0)', '(4.0, 5.0, 6.0)', '(5.0, 6.0, 7.0)'], - 'PROTCOO': ['(7.0, 6.0, 5.0)', '(6.0, 5.0, 4.0)', '(5.0, 4.0, 3.0)', '(4.0, 3.0, 2.0)', '(3.0, 2.0, 1.0)'], + "Acceptor_hbond_1": [1, 0, 1, 0, 1], + "Donor_hbond_1": [0, 1, 0, 1, 0], + "pistacking_1": [1, 0, 0, 1, 1], + "hydrophobic_1": [0, 1, 0, 1, 0], + "PI_saltbridge_1": [1, 0, 1, 0, 1], + "NI_saltbridge_1": [0, 1, 0, 1, 0], + "LIGCOO": [ + "(1.0, 2.0, 3.0)", + "(2.0, 3.0, 4.0)", + "(3.0, 4.0, 5.0)", + "(4.0, 5.0, 6.0)", + "(5.0, 6.0, 7.0)", + ], + "PROTCOO": [ + "(7.0, 6.0, 5.0)", + "(6.0, 5.0, 4.0)", + "(5.0, 4.0, 3.0)", + "(4.0, 3.0, 2.0)", + "(3.0, 2.0, 1.0)", + ], } df = pd.DataFrame(data) @@ -97,7 +127,9 @@ def test_generate_md_pharmacophore_cloudcenters(tmp_path): output_filename = tmp_path / "test_output.pml" # Call the function - generate_md_pharmacophore_cloudcenters(df, 'core_compound', output_filename, 'system_name', id_num=0) + generate_md_pharmacophore_cloudcenters( + df, "core_compound", output_filename, "system_name", id_num=0 + ) # Check if the output file is created assert os.path.isfile(output_filename), f"File {output_filename} not found." @@ -112,41 +144,54 @@ def test_generate_md_pharmacophore_cloudcenters(tmp_path): def test_generate_pharmacophore_centers_all_points(): # Sample data for the DataFrame data = { - 'interaction1': [1, 0, 1, 0, 1], - 'interaction2': [0, 1, 0, 1, 0], - 'LIGCOO': ['(1.0, 2.0, 3.0)', '(2.0, 3.0, 4.0)', '(3.0, 4.0, 5.0)', '(4.0, 5.0, 6.0)', '(5.0, 6.0, 7.0)'], + "interaction1": [1, 0, 1, 0, 1], + "interaction2": [0, 1, 0, 1, 0], + "LIGCOO": [ + "(1.0, 2.0, 3.0)", + "(2.0, 3.0, 4.0)", + "(3.0, 4.0, 5.0)", + "(4.0, 5.0, 6.0)", + "(5.0, 6.0, 7.0)", + ], } df = pd.DataFrame(data) # Sample interactions - interactions = ['interaction1', 'interaction2'] + interactions = ["interaction1", "interaction2"] # Call the function pharmacophore = generate_pharmacophore_centers_all_points(df, interactions) # Check if the generated pharmacophore has the expected structure assert isinstance(pharmacophore, dict), "Pharmacophore should be a dictionary." - + for interaction in interactions: - assert interaction in pharmacophore, f"{interaction} not found in the generated pharmacophore." + assert ( + interaction in pharmacophore + ), f"{interaction} not found in the generated pharmacophore." points = pharmacophore[interaction] - assert isinstance(points, list), f"Pharmacophore points for {interaction} should be a list." - + assert isinstance( + points, list + ), f"Pharmacophore points for {interaction} should be a list." + # Check if the points have the expected structure for point in points: - assert isinstance(point, list) and len(point) == 3, "Each point should be a list of three coordinates." + assert ( + isinstance(point, list) and len(point) == 3 + ), "Each point should be a list of three coordinates." + def test_generate_point_cloud_pml(tmp_path): # Sample data for the cloud_dict cloud_dict = { - 'feature1': { - 'interaction1': [(1.0, 2.0, 3.0), (1.5, 2.5, 3.5), (2.0, 3.0, 4.0)], - 'interaction2': [(2.0, 3.0, 4.0), (2.5, 3.5, 4.5), (3.0, 4.0, 5.0)], + "feature1": { + "interaction1": [(1.0, 2.0, 3.0), (1.5, 2.5, 3.5), (2.0, 3.0, 4.0)], + "interaction2": [(2.0, 3.0, 4.0), (2.5, 3.5, 4.5), (3.0, 4.0, 5.0)], }, - 'feature2': { - 'interaction3': [(3.0, 4.0, 5.0), (3.5, 4.5, 5.5), (4.0, 5.0, 6.0)], + "feature2": { + "interaction3": [(3.0, 4.0, 5.0), (3.5, 4.5, 5.5), (4.0, 5.0, 6.0)], }, } @@ -165,28 +210,30 @@ def test_generate_point_cloud_pml(tmp_path): ET.parse(outname_pml) except ET.ParseError: pytest.fail(f"Invalid XML in {outname_pml}") - + def test_generate_bindingmode_pharmacophore(tmp_path): # Prepare inputs dict_bindingmode = { - "Acceptor_hbond": { - "PROTCOO": [[1, 2, 3]], - "LIGCOO": [[4, 5, 6]] - } + "Acceptor_hbond": {"PROTCOO": [[1, 2, 3]], "LIGCOO": [[4, 5, 6]]} } core_compound = "ligand" sysname = "system" id_num = 0 # Create a symbolic link in the temporary directory - os.symlink(os.path.abspath("./Binding_Modes_Markov_States"), f"{tmp_path}/Binding_Modes_Markov_States") + os.symlink( + os.path.abspath("./Binding_Modes_Markov_States"), + f"{tmp_path}/Binding_Modes_Markov_States", + ) # Prepare the output filename outname = "test_output" # Call the function - generate_bindingmode_pharmacophore(dict_bindingmode, core_compound, sysname, outname, id_num) + generate_bindingmode_pharmacophore( + dict_bindingmode, core_compound, sysname, outname, id_num + ) # Prepare the full output path outname_pml = f"{tmp_path}/Binding_Modes_Markov_States/{outname}.pml" @@ -198,4 +245,4 @@ def test_generate_bindingmode_pharmacophore(tmp_path): try: ET.parse(outname_pml) except ET.ParseError: - pytest.fail(f"Invalid XML in {outname_pml}") \ No newline at end of file + pytest.fail(f"Invalid XML in {outname_pml}") diff --git a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py b/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py index 7635ed83..889bf850 100644 --- a/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py +++ b/openmmdl/tests/openmmdl_analysis/test_interaction_gathering.py @@ -25,6 +25,7 @@ lig_name = "UNK" peptide = "X" + # Test the function def test_characterize_complex(): # Call the function @@ -33,6 +34,7 @@ def test_characterize_complex(): # Check if the function returns a PLInteraction object assert isinstance(interaction_set, PLInteraction) + def test_retrieve_plip_interactions(): # Call the function interactions = retrieve_plip_interactions(topology_file, lig_name) @@ -40,6 +42,7 @@ def test_retrieve_plip_interactions(): # Check if the function returns a dictionary assert isinstance(interactions, dict) + def test_retrieve_plip_interactions_peptide(): # Call the function interactions = retrieve_plip_interactions_peptide(topology_file, peptide) @@ -47,15 +50,19 @@ def test_retrieve_plip_interactions_peptide(): # Check if the function returns a dictionary assert isinstance(interactions, dict) + # Define test data sample_interactions = { "hydrophobic": [["Column1", "Column2"], [1, 2], [3, 4]], "hbond": [["ColumnA", "ColumnB"], ["A", "B"], ["C", "D"]], } + def test_create_df_from_binding_site(): # Test with valid interaction type - df = create_df_from_binding_site(sample_interactions, interaction_type="hydrophobic") + df = create_df_from_binding_site( + sample_interactions, interaction_type="hydrophobic" + ) assert isinstance(df, pd.DataFrame) assert df.shape == (2, 2) assert list(df.columns) == ["Column1", "Column2"] @@ -67,7 +74,9 @@ def test_create_df_from_binding_site(): assert list(df_default.columns) == ["ColumnA", "ColumnB"] # Test with an invalid interaction type (should default to 'hbond') - df_invalid = create_df_from_binding_site(sample_interactions, interaction_type="invalid_type") + df_invalid = create_df_from_binding_site( + sample_interactions, interaction_type="invalid_type" + ) assert isinstance(df_invalid, pd.DataFrame) assert df_invalid.shape == (2, 2) assert list(df_invalid.columns) == ["ColumnA", "ColumnB"] @@ -76,7 +85,7 @@ def test_create_df_from_binding_site(): @pytest.fixture def input_pdb_filename(tmp_path): input_pdb_filename = tmp_path / "input.pdb" - + # Create a mock PDB file with 10 atoms input_pdb_content = """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N @@ -92,19 +101,20 @@ def input_pdb_filename(tmp_path): input_pdb_filename.write_text(input_pdb_content) return input_pdb_filename + def test_change_lig_to_residue(): topology_file = f"{test_data_directory}/complex.pdb" - shutil.copy(str(topology_file), '.') + shutil.copy(str(topology_file), ".") topology_file = "complex.pdb" # Change ligand to residue - change_lig_to_residue(str(topology_file), 'UNK', 'NEW') + change_lig_to_residue(str(topology_file), "UNK", "NEW") # Read the output PDB file and check if residues are modified - with open(topology_file, 'r') as output_file: + with open(topology_file, "r") as output_file: modified_lines = output_file.readlines() - assert any('NEW' in line for line in modified_lines) - assert all('UNK' not in line for line in modified_lines) + assert any("NEW" in line for line in modified_lines) + assert all("UNK" not in line for line in modified_lines) def test_process_frame_with_sample_data(): @@ -122,7 +132,10 @@ def test_process_frame_with_sample_data(): result = process_frame(frame_number, sample_universe, lig_name) # Define the expected columns you want to check - expected_columns = ["FRAME", "INTERACTION"] # Add the specific columns you want to validate + expected_columns = [ + "FRAME", + "INTERACTION", + ] # Add the specific columns you want to validate # Check if the result is a Pandas DataFrame assert isinstance(result, pd.DataFrame) @@ -135,24 +148,32 @@ def test_process_frame_with_sample_data(): def test_process_frame_with_sample_data_special(): # Define a sample frame number frame_number = 1 - special='HEM' + special = "HEM" destination_file = "processing_frame_1.pdb" destination_file_complex = "complex.pdb" - + shutil.copy(frame_file, destination_file) - shutil.copy(str(ligand_special), '.') - shutil.copy(str(topology_metal), '.') + shutil.copy(str(ligand_special), ".") + shutil.copy(str(topology_metal), ".") shutil.copy(topology_metal, destination_file_complex) # Load the sample PDB file into an MDAnalysis Universe sample_universe = mda.Universe(topology_metal, trajetory_metal) # Call the process_frame function with the sample data for special ligand 'HEM' - result_special = process_frame(frame_number, sample_universe, lig_name, special='HEM') + result_special = process_frame( + frame_number, sample_universe, lig_name, special="HEM" + ) # Define the expected columns you want to check for special ligand 'HEM' - expected_columns_special = ["FRAME", "INTERACTION", "TARGET_IDX", "RESTYPE", "LOCATION"] # Add specific columns for special ligand 'HEM' + expected_columns_special = [ + "FRAME", + "INTERACTION", + "TARGET_IDX", + "RESTYPE", + "LOCATION", + ] # Add specific columns for special ligand 'HEM' # Check if the result is a Pandas DataFrame for special ligand 'HEM' assert isinstance(result_special, pd.DataFrame) @@ -160,7 +181,7 @@ def test_process_frame_with_sample_data_special(): # Check if all expected columns are present in the result for special ligand 'HEM' for column in expected_columns_special: assert column in result_special.columns - + shutil.copy(topology_file, destination_file_complex) @@ -178,10 +199,16 @@ def test_process_frame_with_sample_data_peptide(): sample_universe = mda.Universe(topology_file) # Call the process_frame function with the sample data for peptide - result_peptide = process_frame(frame_number, sample_universe, lig_name, peptide='X', special=None) + result_peptide = process_frame( + frame_number, sample_universe, lig_name, peptide="X", special=None + ) # Define the expected columns you want to check for peptide - expected_columns_peptide = ["FRAME", "INTERACTION", "TARGET_IDX"] # Add specific columns for peptide + expected_columns_peptide = [ + "FRAME", + "INTERACTION", + "TARGET_IDX", + ] # Add specific columns for peptide # Check if the result is a Pandas DataFrame for peptide assert isinstance(result_peptide, pd.DataFrame) @@ -194,18 +221,35 @@ def test_process_frame_with_sample_data_peptide(): def test_process_trajectory(): topology_file = f"{test_data_directory}/0_unk_hoh.pdb" trajectory_file = f"{test_data_directory}/all_50.dcd" - pdb_md = mda.Universe(topology_file,trajectory_file) + pdb_md = mda.Universe(topology_file, trajectory_file) dataframe = None num_processes = 2 lig_name = "UNK" - interaction_list = pd.DataFrame(columns=["RESNR", "RESTYPE", "RESCHAIN", "RESNR_LIG", "RESTYPE_LIG", "RESCHAIN_LIG", "DIST", "LIGCARBONIDX", "PROTCARBONIDX", "LIGCOO", "PROTCOO"]) - - interaction_list = process_trajectory(pdb_md, dataframe, num_processes, lig_name, special_ligand=None, peptide=None) + interaction_list = pd.DataFrame( + columns=[ + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "DIST", + "LIGCARBONIDX", + "PROTCARBONIDX", + "LIGCOO", + "PROTCOO", + ] + ) + + interaction_list = process_trajectory( + pdb_md, dataframe, num_processes, lig_name, special_ligand=None, peptide=None + ) assert interaction_list is not None assert len(interaction_list) > 10 - + + def test_process_frame_special_with_files(): test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory topology_metal = f"{test_data_directory}/metal_top.pdb" @@ -213,6 +257,7 @@ def test_process_frame_special_with_files(): # Load PDB and DCD files using mdanalysis.Universe import MDAnalysis as mda + u = mda.Universe(topology_metal, trajetory_metal) lig_name = "UNK" # Replace with the actual ligand name @@ -229,19 +274,20 @@ def test_process_frame_special_with_files(): # Clean up any temporary files created during the test for frame in range(len(u.trajectory)): - temp_file = f'processing_frame_{frame}.pdb' + temp_file = f"processing_frame_{frame}.pdb" if os.path.exists(temp_file): os.remove(temp_file) + def test_process_frame_wrapper(): test_data_directory = "openmmdl/tests/data/in" # Replace with the actual path to your test data directory topology_metal = f"{test_data_directory}/metal_top.pdb" trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" ligand_special = f"{test_data_directory}/ligand_special.pdb" - shutil.copy(str(topology_metal), '.') - shutil.copy(str(trajetory_metal), '.') - shutil.copy(str(ligand_special), '.') + shutil.copy(str(topology_metal), ".") + shutil.copy(str(trajetory_metal), ".") + shutil.copy(str(ligand_special), ".") topology_metal = "metal_top.pdb" trajetory_metal = "metal_traj_25.dcd" @@ -263,39 +309,48 @@ def test_process_frame_wrapper(): def test_fill_missing_frames(): # Test Case 1: Basic functionality - data = {'FRAME': [1, 2, 4, 5], 'Value1': ['A', 'B', 'C', 'D']} + data = {"FRAME": [1, 2, 4, 5], "Value1": ["A", "B", "C", "D"]} df = pd.DataFrame(data) md_len = 6 filled_df = fill_missing_frames(df, md_len) - assert all(filled_df['FRAME'] == [1, 2, 3, 4, 5]) - assert all(filled_df.loc[filled_df['FRAME'] == 3, 'Value1'] == 'skip') + assert all(filled_df["FRAME"] == [1, 2, 3, 4, 5]) + assert all(filled_df.loc[filled_df["FRAME"] == 3, "Value1"] == "skip") # Test Case 4: No missing frames - no_missing_frames_data = {'FRAME': [1, 2, 3, 4, 5, 6], 'Value1': ['A', 'B', 'C', 'D', 'E', 'F']} + no_missing_frames_data = { + "FRAME": [1, 2, 3, 4, 5, 6], + "Value1": ["A", "B", "C", "D", "E", "F"], + } no_missing_frames_df = pd.DataFrame(no_missing_frames_data) filled_no_missing_frames_df = fill_missing_frames(no_missing_frames_df, md_len=6) - assert all(filled_no_missing_frames_df['FRAME'] == [1, 2, 3, 4, 5, 6]) # Should remain unchanged + assert all( + filled_no_missing_frames_df["FRAME"] == [1, 2, 3, 4, 5, 6] + ) # Should remain unchanged # Test Case 5: DataFrame with additional columns - data_with_extra_columns = {'FRAME': [1, 2, 4, 5], 'Value1': ['A', 'B', 'C', 'D'], 'Value2': [10, 20, 30, 40]} + data_with_extra_columns = { + "FRAME": [1, 2, 4, 5], + "Value1": ["A", "B", "C", "D"], + "Value2": [10, 20, 30, 40], + } df_with_extra_columns = pd.DataFrame(data_with_extra_columns) # Ensure the original DataFrame has unique frame numbers - assert df_with_extra_columns['FRAME'].nunique() == len(df_with_extra_columns) + assert df_with_extra_columns["FRAME"].nunique() == len(df_with_extra_columns) filled_df_extra_columns = fill_missing_frames(df_with_extra_columns, md_len=6) expected_frames = [1, 2, 3, 4, 5] # Debugging prints - print(f'Original DataFrame length: {len(df_with_extra_columns)}') - print(f'Filled DataFrame length: {len(filled_df_extra_columns)}') - print(f'Expected frames: {expected_frames}') + print(f"Original DataFrame length: {len(df_with_extra_columns)}") + print(f"Filled DataFrame length: {len(filled_df_extra_columns)}") + print(f"Expected frames: {expected_frames}") # Assert that the resulting DataFrame has unique frame numbers - assert filled_df_extra_columns['FRAME'].nunique() == len(filled_df_extra_columns) + assert filled_df_extra_columns["FRAME"].nunique() == len(filled_df_extra_columns) # Assert that the resulting DataFrame has the expected frames - assert all(filled_df_extra_columns['FRAME'] == expected_frames) + assert all(filled_df_extra_columns["FRAME"] == expected_frames) # Assert that the length of the resulting DataFrame is equal to the length of expected frames assert len(filled_df_extra_columns) == len(expected_frames) diff --git a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py b/openmmdl/tests/openmmdl_analysis/test_preprocessing.py index 4bd4cb19..16a39d39 100644 --- a/openmmdl/tests/openmmdl_analysis/test_preprocessing.py +++ b/openmmdl/tests/openmmdl_analysis/test_preprocessing.py @@ -107,51 +107,6 @@ def test_process_pdb_file(): assert "UNK" in modified_data -def test_convert_pdb_to_sdf(tmp_path): - input_pdb_filename = tmp_path / "input.pdb" - output_sdf_filename = tmp_path / "output.sdf" - - # Create a mock PDB file - input_pdb_filename.write_text( - """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N""" - ) - - convert_pdb_to_sdf(str(input_pdb_filename), str(output_sdf_filename)) - assert output_sdf_filename.exists() - - def test_renumber_atoms_in_residues(sample_pdb_data, tmp_path): input_pdb_filename = tmp_path / "input.pdb" output_pdb_filename = tmp_path / "output.pdb" @@ -197,59 +152,6 @@ def test_renumber_atoms_in_residues(sample_pdb_data, tmp_path): assert output_pdb_filename.exists() -@pytest.fixture -def pdb_file(tmpdir): - # Create a temporary PDB file for testing (truncated for brevity) - pdb_content = """ATOM 1 N UNK A 454 43.493 48.319 35.835 1.00 0.00 A N -ATOM 2 N1 UNK A 454 44.740 47.862 35.697 1.00 0.00 A N -ATOM 3 C14 UNK A 454 44.608 46.866 34.829 1.00 0.00 A C -ATOM 4 N2 UNK A 454 43.265 46.644 34.450 1.00 0.00 A N -ATOM 5 C7 UNK A 454 42.607 47.556 35.077 1.00 0.00 A C -ATOM 6 H5 UNK A 454 41.542 47.701 34.954 1.00 0.00 A H -ATOM 7 H10 UNK A 454 45.308 46.132 34.453 1.00 0.00 A H -ATOM 8 C UNK A 454 43.168 49.513 36.656 1.00 0.00 A C -ATOM 9 C2 UNK A 454 42.743 50.705 35.818 1.00 0.00 A C -ATOM 10 C4 UNK A 454 43.545 51.052 34.671 1.00 0.00 A C -ATOM 11 C9 UNK A 454 43.171 52.151 33.897 1.00 0.00 A C -ATOM 12 C13 UNK A 454 42.090 52.924 34.222 1.00 0.00 A C -ATOM 13 C11 UNK A 454 41.393 52.671 35.378 1.00 0.00 A C -ATOM 14 C6 UNK A 454 41.793 51.635 36.268 1.00 0.00 A C -ATOM 15 H4 UNK A 454 41.220 51.358 37.148 1.00 0.00 A H -ATOM 16 H9 UNK A 454 40.518 53.291 35.552 1.00 0.00 A H -ATOM 17 C16 UNK A 454 41.790 54.079 33.432 1.00 0.00 A C -ATOM 18 N4 UNK A 454 41.594 54.934 32.652 1.00 0.00 A N -ATOM 19 H7 UNK A 454 43.694 52.248 32.951 1.00 0.00 A H -ATOM 20 H2 UNK A 454 44.333 50.369 34.369 1.00 0.00 A H -ATOM 21 H UNK A 454 44.108 49.790 37.148 1.00 0.00 A H -ATOM 22 C1 UNK A 454 42.146 49.054 37.737 1.00 0.00 A C -ATOM 23 C5 UNK A 454 42.675 48.761 39.003 1.00 0.00 A C -ATOM 24 C10 UNK A 454 41.859 48.278 39.998 1.00 0.00 A C -ATOM 25 H8 UNK A 454 42.284 48.099 40.981 1.00 0.00 A H -ATOM 26 H3 UNK A 454 43.752 48.806 39.135 1.00 0.00 A H -ATOM 27 C3 UNK A 454 40.774 48.885 37.463 1.00 0.00 A C -ATOM 28 H1 UNK A 454 40.310 49.079 36.500 1.00 0.00 A H -ATOM 29 C8 UNK A 454 39.907 48.435 38.509 1.00 0.00 A C -ATOM 30 H6 UNK A 454 38.833 48.310 38.406 1.00 0.00 A H -ATOM 31 C12 UNK A 454 40.466 48.125 39.823 1.00 0.00 A C -ATOM 32 C15 UNK A 454 39.627 47.605 40.833 1.00 0.00 A C -ATOM 33 N3 UNK A 454 38.981 47.235 41.740 1.00 0.00 A N """ - - pdb_path = tmpdir.join("test_input.pdb") - pdb_path.write(pdb_content) - return str(pdb_path) - - -def test_convert_pdb_to_sdf(pdb_file, tmpdir): - # Define the expected output SDF file path - expected_sdf_file = str(tmpdir.join("test_output.sdf")) - - # Call the function with the test PDB file and output SDF file - convert_pdb_to_sdf(pdb_file, expected_sdf_file) - - # Check if the output SDF file was created - assert os.path.isfile(expected_sdf_file) - - @pytest.fixture def sample_pdb_info(): return """ diff --git a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py b/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py index 20d72954..574f8a7f 100644 --- a/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py +++ b/openmmdl/tests/openmmdl_analysis/test_rdkit_figure_generation.py @@ -6,26 +6,36 @@ from pathlib import Path from openmmdl.openmmdl_analysis.rdkit_figure_generation import * -test_data_directory = Path("openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation") +test_data_directory = Path( + "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation" +) test_data_directory_files = Path("openmmdl/tests/data/in") -lig_no_h = test_data_directory_files / 'lig_no_h.pdb' -complex = test_data_directory_files / 'complex.pdb' -smi_file = test_data_directory_files / 'lig_no_h.smi' -current_directory = os.getcwd() -output_path = 'all_binding_modes_arranged.png' - -shutil.copy(str(lig_no_h), '.') -shutil.copy(str(complex), '.') - -@pytest.mark.parametrize("input_data, expected_output", [ - (["60GLUA_4206_4207_4216_4217_4218_4205_hydrophobic"], ['60GLUA 4206 4207 4216 4217 4218 4205 hydrophobic']), - (["165ASPA_4203_Acceptor_hbond"], ['165ASPA 4203 Acceptor hbond']), - (["125TYRA_4192_Acceptor_waterbridge"], ['125TYRA 4192 Acceptor waterbridge']), -]) +lig_no_h = test_data_directory_files / "lig_no_h.pdb" +complex = test_data_directory_files / "complex.pdb" +smi_file = test_data_directory_files / "lig_no_h.smi" +current_directory = os.getcwd() +output_path = "all_binding_modes_arranged.png" + +shutil.copy(str(lig_no_h), ".") +shutil.copy(str(complex), ".") + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ( + ["60GLUA_4206_4207_4216_4217_4218_4205_hydrophobic"], + ["60GLUA 4206 4207 4216 4217 4218 4205 hydrophobic"], + ), + (["165ASPA_4203_Acceptor_hbond"], ["165ASPA 4203 Acceptor hbond"]), + (["125TYRA_4192_Acceptor_waterbridge"], ["125TYRA 4192 Acceptor waterbridge"]), + ], +) def test_split_interaction_data(input_data, expected_output): result = split_interaction_data(input_data) assert result == expected_output + def test_highlight_numbers(): # Input data split_data = [ @@ -41,16 +51,26 @@ def test_highlight_numbers(): "59ARGA 4194 F halogen", "166ARGA 4202,4203 Carboxylate NI saltbridge", "165ASPA 4202 Amine PI saltbridge", - "HEM 4202 FE 4 metal" + "HEM 4202 FE 4 metal", ] starting_idx = 1 # Updated starting index result = highlight_numbers(split_data, starting_idx) - highlighted_hbond_donor, highlighted_hbond_acceptor, highlighted_hbond_both, \ - highlighted_hydrophobic, highlighted_waterbridge, highlighted_pistacking, highlighted_halogen, \ - highlighted_ni, highlighted_pi, highlighted_pication, highlighted_metal = result + ( + highlighted_hbond_donor, + highlighted_hbond_acceptor, + highlighted_hbond_both, + highlighted_hydrophobic, + highlighted_waterbridge, + highlighted_pistacking, + highlighted_halogen, + highlighted_ni, + highlighted_pi, + highlighted_pication, + highlighted_metal, + ) = result assert highlighted_hbond_donor is not None assert highlighted_hbond_acceptor is not None @@ -59,74 +79,79 @@ def test_highlight_numbers(): assert highlighted_waterbridge is not None assert highlighted_halogen is not None assert highlighted_ni is not None - assert highlighted_pi is not None and len(highlighted_pi) > 0 + assert highlighted_pi is not None and len(highlighted_pi) > 0 assert highlighted_pication is not None assert highlighted_metal is not None - + + def test_update_dict(): # Test case 1: Check if the target dictionary is updated correctly - target_dict = {1: '1', 2: '2'} - source_dict = {3: '3', 4: '4'} + target_dict = {1: "1", 2: "2"} + source_dict = {3: "3", 4: "4"} update_dict(target_dict, source_dict) - assert target_dict == {1: '1', 2: '2', 3: '3', 4: '4'} + assert target_dict == {1: "1", 2: "2", 3: "3", 4: "4"} # Test case 2: Check if the function handles multiple source dictionaries target_dict = {} - source_dict1 = {1: '1'} - source_dict2 = {2: '2', 3: '3'} + source_dict1 = {1: "1"} + source_dict2 = {2: "2", 3: "3"} update_dict(target_dict, source_dict1, source_dict2) - assert target_dict == {1: '1', 2: '2', 3: '3'} + assert target_dict == {1: "1", 2: "2", 3: "3"} # Test case 3: Check if the function handles empty source dictionaries - target_dict = {1: '1', 2: '2'} + target_dict = {1: "1", 2: "2"} update_dict(target_dict) # No source dictionaries provided - assert target_dict == {1: '1', 2: '2'} + assert target_dict == {1: "1", 2: "2"} + def test_generate_interaction_dict(): # Test with a known interaction type 'hydrophobic' - interaction_type = 'hydrophobic' + interaction_type = "hydrophobic" keys = [1, 2, 3] - expected_result = { - 1: (1.0, 1.0, 0.0), - 2: (1.0, 1.0, 0.0), - 3: (1.0, 1.0, 0.0) - } + expected_result = {1: (1.0, 1.0, 0.0), 2: (1.0, 1.0, 0.0), 3: (1.0, 1.0, 0.0)} result = generate_interaction_dict(interaction_type, keys) assert result == expected_result + def test_create_and_merge_images_with_split_data(): # Define test data - binding_mode = 'Binding_Mode_1' + binding_mode = "Binding_Mode_1" occurrence_percent = 92 split_data = [ "166ARGA 4220,4221 Carboxylate NI saltbridge", "161PHEA 4221 Acceptor hbond", - "207ILEA 4205 4206 4207 4208 4209 4204 hydrophobic" + "207ILEA 4205 4206 4207 4208 4209 4204 hydrophobic", ] merged_image_paths = [] # Define source image paths - source_image_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.png' - source_svg_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.svg' - source_merged_image_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png' + source_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.png" + source_svg_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1.svg" + source_merged_image_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" # Copy source image files to the working directory working_directory = os.getcwd() - destination_image_path = os.path.join(working_directory, os.path.basename(source_image_path)) - destination_svg_path = os.path.join(working_directory, os.path.basename(source_svg_path)) - destination_merged_image_path = os.path.join(working_directory, os.path.basename(source_merged_image_path)) + destination_image_path = os.path.join( + working_directory, os.path.basename(source_image_path) + ) + destination_svg_path = os.path.join( + working_directory, os.path.basename(source_svg_path) + ) + destination_merged_image_path = os.path.join( + working_directory, os.path.basename(source_merged_image_path) + ) shutil.copy(source_image_path, destination_image_path) shutil.copy(source_svg_path, destination_svg_path) shutil.copy(source_merged_image_path, destination_merged_image_path) - # Print the current files in the working directory for debugging files_in_working_directory = os.listdir(working_directory) print("Files in Working Directory before:", files_in_working_directory) # Run the function - merged_image_paths = create_and_merge_images(binding_mode, occurrence_percent, split_data, merged_image_paths) - + merged_image_paths = create_and_merge_images( + binding_mode, occurrence_percent, split_data, merged_image_paths + ) # Print the current files in the working directory for debugging files_in_working_directory = os.listdir(working_directory) @@ -146,8 +171,8 @@ def test_create_and_merge_images_with_split_data(): def test_max_width_and_height_calculation(): # Create some example images with different sizes - image1 = Image.new('RGB', (100, 200), (255, 255, 255)) - image2 = Image.new('RGB', (150, 250), (255, 255, 255)) + image1 = Image.new("RGB", (100, 200), (255, 255, 255)) + image2 = Image.new("RGB", (150, 250), (255, 255, 255)) merged_images = [image1, image2] # Calculate the maximum width and height @@ -158,10 +183,11 @@ def test_max_width_and_height_calculation(): assert max_width == 150 assert max_height == 250 + def test_big_figure_creation(): # Create example merged images - image1 = Image.new('RGB', (100, 200), (255, 255, 255)) - image2 = Image.new('RGB', (150, 250), (255, 255, 255)) + image1 = Image.new("RGB", (100, 200), (255, 255, 255)) + image2 = Image.new("RGB", (150, 250), (255, 255, 255)) merged_images = [image1, image2] # Calculate the maximum width and height @@ -177,24 +203,33 @@ def test_big_figure_creation(): total_height = max_height * num_rows # Create a new image with the calculated width and height - big_figure = Image.new('RGB', (total_width, total_height), (255, 255, 255)) # Set background to white + big_figure = Image.new( + "RGB", (total_width, total_height), (255, 255, 255) + ) # Set background to white # Assert the dimensions of the created big_figure assert big_figure.size == (300, 250) # Width should be 300, height should be 250 + def test_arranged_figure_generation(): - binding_mode1_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png' - binding_mode2_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_2_merged.png' - all_modes_path = 'openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/all_binding_modes_arranged.png' + binding_mode1_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_1_merged.png" + binding_mode2_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/Binding_Mode_2_merged.png" + all_modes_path = "openmmdl/tests/data/openmmdl_analysis/rdkit_figure_generation/all_binding_modes_arranged.png" working_directory = os.getcwd() - + # Print the working directory to verify it's as expected print("Working Directory:", working_directory) - destination_path_1 = os.path.join(working_directory, os.path.basename(binding_mode1_path)) - destination_path_2 = os.path.join(working_directory, os.path.basename(binding_mode2_path)) - destination_path_all = os.path.join(working_directory, os.path.basename(all_modes_path)) - + destination_path_1 = os.path.join( + working_directory, os.path.basename(binding_mode1_path) + ) + destination_path_2 = os.path.join( + working_directory, os.path.basename(binding_mode2_path) + ) + destination_path_all = os.path.join( + working_directory, os.path.basename(all_modes_path) + ) + # Print the destination paths to verify they are constructed correctly print("Destination Path 1:", destination_path_1) print("Destination Path 2:", destination_path_2) @@ -203,9 +238,9 @@ def test_arranged_figure_generation(): shutil.copy(binding_mode1_path, destination_path_1) shutil.copy(binding_mode2_path, destination_path_2) shutil.copy(all_modes_path, destination_path_all) - - merged_image_paths = ['Binding_Mode_1_merged.png', 'Binding_Mode_2_merged.png'] - output_path = 'all_binding_modes_arranged.png' + + merged_image_paths = ["Binding_Mode_1_merged.png", "Binding_Mode_2_merged.png"] + output_path = "all_binding_modes_arranged.png" output_path = os.path.join(working_directory, output_path) print(output_path) @@ -217,11 +252,15 @@ def test_arranged_figure_generation(): files_in_working_directory = os.listdir(working_directory) print("Files in Working Directory:", files_in_working_directory) - output_path = os.path.join(working_directory, 'Binding_Modes_Markov_States', 'all_binding_modes_arranged.png') + output_path = os.path.join( + working_directory, + "Binding_Modes_Markov_States", + "all_binding_modes_arranged.png", + ) print(output_path) # Check if the output file was created - + assert output_path is not None @@ -232,16 +271,18 @@ def test_arranged_figure_generation(): shutil.copy(lig_no_h, Path.cwd()) shutil.copy(smi_file, Path.cwd()) + # Test the generate_ligand_image function def test_generate_ligand_image(): ligand_name = "UNK" - generate_ligand_image(ligand_name, "complex.pdb", "lig_no_h.pdb", "lig_no_h.smi", output_image_file) + generate_ligand_image( + ligand_name, "complex.pdb", "lig_no_h.pdb", "lig_no_h.smi", output_image_file + ) # Assert that the output image file exists assert os.path.exists(output_image_file) - # Run the tests -if __name__ == '__main__': +if __name__ == "__main__": pytest.main() diff --git a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py b/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py index c7aae555..b1f0dc8c 100644 --- a/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py +++ b/openmmdl/tests/openmmdl_analysis/test_rmsd_calculation.py @@ -6,7 +6,10 @@ import numpy as np import mdtraj as md -from openmmdl.openmmdl_analysis.rmsd_calculation import rmsd_for_atomgroups, RMSD_dist_frames +from openmmdl.openmmdl_analysis.rmsd_calculation import ( + rmsd_for_atomgroups, + RMSD_dist_frames, +) test_data_directory = Path("openmmdl/tests/data/in") topology_file = f"{test_data_directory}/0_unk_hoh.pdb" @@ -15,15 +18,18 @@ selection2 = ["resname UNK"] ligand_name = "UNK" + def test_rmsd_for_atomgroups(): # Call the function - rmsd_df = rmsd_for_atomgroups(topology_file, trajectory_file, selection1, selection2) + rmsd_df = rmsd_for_atomgroups( + topology_file, trajectory_file, selection1, selection2 + ) # Check if the output DataFrame has the correct structure assert isinstance(rmsd_df, pd.DataFrame) assert rmsd_df.index.name == "frame" - + # Define file paths csv_path = os.path.join("RMSD", "RMSD_over_time.csv") plot_path = os.path.join("RMSD", "RMSD_over_time.png") @@ -31,27 +37,30 @@ def test_rmsd_for_atomgroups(): print("Checking CSV file:", csv_path) # Check if the CSV file exists assert os.path.exists(csv_path), f"CSV file does not exist at {csv_path}" - + print("Checking plot file:", plot_path) # Check if the plot file exists assert os.path.exists(plot_path), f"Plot file does not exist at {plot_path}" - + # Cleanup created files after the test os.remove(csv_path) os.remove(plot_path) + def test_rmsd_dist_frames(): # Call the function - pairwise_rmsd_prot, pairwise_rmsd_lig = RMSD_dist_frames(topology_file, trajectory_file, ligand_name) + pairwise_rmsd_prot, pairwise_rmsd_lig = RMSD_dist_frames( + topology_file, trajectory_file, ligand_name + ) # Check if the function returns numpy arrays for pairwise RMSD assert isinstance(pairwise_rmsd_prot, np.ndarray) assert isinstance(pairwise_rmsd_lig, np.ndarray) # Define file paths - output_directory = 'RMSD' - plot_path = os.path.join(output_directory, 'RMSD_between_the_frames.png') + output_directory = "RMSD" + plot_path = os.path.join(output_directory, "RMSD_between_the_frames.png") print("Checking plot file:", plot_path) # Check if the plot file exists diff --git a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py b/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py index f30274af..0b5e2aec 100644 --- a/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py +++ b/openmmdl/tests/openmmdl_analysis/visualization_functions_test.py @@ -11,137 +11,175 @@ from openmmdl.openmmdl_analysis.visualization_functions import * test_data_directory_files = Path("openmmdl/tests/data/in") -clouds = test_data_directory_files / 'clouds.json' -waters_pdb = test_data_directory_files / 'interacting_waters.pdb' -waters_dcd = test_data_directory_files / 'interacting_waters.dcd' -waters_pkl = test_data_directory_files / 'interacting_waters.pkl' +clouds = test_data_directory_files / "clouds.json" +waters_pdb = test_data_directory_files / "interacting_waters.pdb" +waters_dcd = test_data_directory_files / "interacting_waters.dcd" +waters_pkl = test_data_directory_files / "interacting_waters.pkl" + # visualization_functions tests @pytest.fixture def sample_dataframe_interacting_water_ids(): data = { - 'Interaction1': [0, 1, 0, 1, 0], - 'Interaction2': [1, 0, 0, 0, 1], - 'WATER_IDX': [101, 102, None, 104, 105], - 'FRAME': [1, 2, 3, 4, 5] + "Interaction1": [0, 1, 0, 1, 0], + "Interaction2": [1, 0, 0, 0, 1], + "WATER_IDX": [101, 102, None, 104, 105], + "FRAME": [1, 2, 3, 4, 5], } df_all = pd.DataFrame(data) return df_all + def test_interacting_water_ids(sample_dataframe_interacting_water_ids): - waterbridge_interactions = ['Interaction1', 'Interaction2'] - - result = interacting_water_ids(sample_dataframe_interacting_water_ids, waterbridge_interactions) + waterbridge_interactions = ["Interaction1", "Interaction2"] + + result = interacting_water_ids( + sample_dataframe_interacting_water_ids, waterbridge_interactions + ) expected_interacting_waters = [101, 102, 104, 105] assert sorted(result) == sorted(expected_interacting_waters) - + @pytest.fixture def sample_dataframe_cloud_json_generation(): data = { - 'LIGCOO': [ + "LIGCOO": [ "(1.0, 2.0, 3.0)", "(4.0, 5.0, 6.0)", "(7.0, 8.0, 9.0)", ], - 'INTERACTION': [ - 'hydrophobic', - 'acceptor', - 'donor', + "INTERACTION": [ + "hydrophobic", + "acceptor", + "donor", ], - 'PROTISDON': [ - 'False', - 'True', - 'False', + "PROTISDON": [ + "False", + "True", + "False", ], - 'PROTISPOS': [ - 'False', - 'False', - 'True', + "PROTISPOS": [ + "False", + "False", + "True", ], } df_all = pd.DataFrame(data) return df_all + def test_run_visualization(): # Set up the paths package_path = Path("openmmdl/openmmdl_analysis") - notebook_path = package_path / "visualization.ipynb" - + notebook_path = package_path / "visualization.ipynb" + # Run the visualization function # run_visualization() - + # Check if the notebook was copied to the current directory with the correct name - copied_notebook_path = os.path.join(os.getcwd(), 'visualization.ipynb') - shutil.copy(str(notebook_path), '.') - new_notebook_path = 'visualization.ipynb' + copied_notebook_path = os.path.join(os.getcwd(), "visualization.ipynb") + shutil.copy(str(notebook_path), ".") + new_notebook_path = "visualization.ipynb" assert os.path.isfile(copied_notebook_path) - + # Check if the content of the copied notebook is the same as the original notebook - with open(new_notebook_path, 'r') as copied_notebook: - with open(notebook_path, 'r') as original_notebook: + with open(new_notebook_path, "r") as copied_notebook: + with open(notebook_path, "r") as original_notebook: assert copied_notebook.read() == original_notebook.read() + @pytest.fixture def sample_dataframe(): # Create a sample dataframe for testing data = { - 'LIGCOO': ['(1.0, 2.0, 3.0)', '(4.0, 5.0, 6.0)', '(13.0, 14.0, 15.0)', '(16.0, 17.0, 18.0)', '(19.0, 20.0, 21.0)'], - 'INTERACTION': ['hydrophobic', 'acceptor', 'donor', 'pistacking', 'pication'], - 'PROTISDON': ['False', 'True', 'True', 'False', 'True'], - 'PROTISPOS': ['False', 'True', 'False', 'False', 'False'], - 'TARGETCOO': ['(7.0, 8.0, 9.0)', '(10.0, 11.0, 12.0)', '(22.0, 23.0, 24.0)', '(25.0, 26.0, 27.0)', '(28.0, 29.0, 30.0)'], + "LIGCOO": [ + "(1.0, 2.0, 3.0)", + "(4.0, 5.0, 6.0)", + "(13.0, 14.0, 15.0)", + "(16.0, 17.0, 18.0)", + "(19.0, 20.0, 21.0)", + ], + "INTERACTION": ["hydrophobic", "acceptor", "donor", "pistacking", "pication"], + "PROTISDON": ["False", "True", "True", "False", "True"], + "PROTISPOS": ["False", "True", "False", "False", "False"], + "TARGETCOO": [ + "(7.0, 8.0, 9.0)", + "(10.0, 11.0, 12.0)", + "(22.0, 23.0, 24.0)", + "(25.0, 26.0, 27.0)", + "(28.0, 29.0, 30.0)", + ], } - + # Extend the existing dataframe with examples for additional interactions additional_data = [ - ('(31.0, 32.0, 33.0)', 'waterbridge', 'True', 'False', '(34.0, 35.0, 36.0)'), - ('(37.0, 38.0, 39.0)', 'negative_ionizable', 'False', 'True', '(40.0, 41.0, 42.0)'), - ('(43.0, 44.0, 45.0)', 'positive_ionizable', 'False', 'True', '(46.0, 47.0, 48.0)'), - ('(49.0, 50.0, 51.0)', 'halogen', 'False', 'False', '(52.0, 53.0, 54.0)'), - ('(55.0, 56.0, 57.0)', 'metal', 'False', 'False', '(58.0, 59.0, 60.0)'), - ('(61.0, 62.0, 63.0)', 'hydrophobic', 'False', 'False', '(64.0, 65.0, 66.0)'), - ('(61.0, 62.0, 63.0)', 'saltbridge', 'False', 'True', '(64.0, 65.0, 66.0)'), - ('(61.0, 62.0, 63.0)', 'saltbridge', 'False', 'False', '(64.0, 65.0, 66.0)'), - ('(67.0, 68.0, 69.0)', 'donor', 'True', 'False', '(70.0, 71.0, 72.0)'), - ('(73.0, 74.0, 75.0)', 'acceptor', 'False', 'False', '(76.0, 77.0, 78.0)'), - ('(79.0, 80.0, 81.0)', 'negative_ionizable', 'False', 'True', '(82.0, 83.0, 84.0)'), + ("(31.0, 32.0, 33.0)", "waterbridge", "True", "False", "(34.0, 35.0, 36.0)"), + ( + "(37.0, 38.0, 39.0)", + "negative_ionizable", + "False", + "True", + "(40.0, 41.0, 42.0)", + ), + ( + "(43.0, 44.0, 45.0)", + "positive_ionizable", + "False", + "True", + "(46.0, 47.0, 48.0)", + ), + ("(49.0, 50.0, 51.0)", "halogen", "False", "False", "(52.0, 53.0, 54.0)"), + ("(55.0, 56.0, 57.0)", "metal", "False", "False", "(58.0, 59.0, 60.0)"), + ("(61.0, 62.0, 63.0)", "hydrophobic", "False", "False", "(64.0, 65.0, 66.0)"), + ("(61.0, 62.0, 63.0)", "saltbridge", "False", "True", "(64.0, 65.0, 66.0)"), + ("(61.0, 62.0, 63.0)", "saltbridge", "False", "False", "(64.0, 65.0, 66.0)"), + ("(67.0, 68.0, 69.0)", "donor", "True", "False", "(70.0, 71.0, 72.0)"), + ("(73.0, 74.0, 75.0)", "acceptor", "False", "False", "(76.0, 77.0, 78.0)"), + ( + "(79.0, 80.0, 81.0)", + "negative_ionizable", + "False", + "True", + "(82.0, 83.0, 84.0)", + ), ] - + for row in additional_data: - data['LIGCOO'].append(row[0]) - data['INTERACTION'].append(row[1]) - data['PROTISDON'].append(row[2]) - data['PROTISPOS'].append(row[3]) - data['TARGETCOO'].append(row[4]) - + data["LIGCOO"].append(row[0]) + data["INTERACTION"].append(row[1]) + data["PROTISDON"].append(row[2]) + data["PROTISPOS"].append(row[3]) + data["TARGETCOO"].append(row[4]) + return pd.DataFrame(data) - + + def test_cloud_json_generation(sample_dataframe): result = cloud_json_generation(sample_dataframe) - assert 'hydrophobic' in result - assert 'acceptor' in result - assert 'donor' in result - assert 'waterbridge' in result - assert 'negative_ionizable' in result - assert 'positive_ionizable' in result - assert 'pistacking' in result - assert 'pication' in result - assert 'halogen' in result - assert 'metal' in result + assert "hydrophobic" in result + assert "acceptor" in result + assert "donor" in result + assert "waterbridge" in result + assert "negative_ionizable" in result + assert "positive_ionizable" in result + assert "pistacking" in result + assert "pication" in result + assert "halogen" in result + assert "metal" in result # Add more specific assertions based on your expectations for the output # For example, you might want to check the structure of the generated dictionary - assert isinstance(result['hydrophobic'], dict) - assert 'coordinates' in result['hydrophobic'] - assert 'color' in result['hydrophobic'] - assert 'radius' in result['hydrophobic'] + assert isinstance(result["hydrophobic"], dict) + assert "coordinates" in result["hydrophobic"] + assert "color" in result["hydrophobic"] + assert "radius" in result["hydrophobic"] # Add more tests based on your specific requirements and expected results + @pytest.fixture def input_paths(): test_data_directory = Path("openmmdl/tests/data/in") @@ -151,6 +189,7 @@ def input_paths(): trajetory_metal = f"{test_data_directory}/metal_traj_25.dcd" return topology_metal, trajetory_metal + def test_save_interacting_waters_trajectory(input_paths): topology_metal, trajetory_metal = input_paths interacting_waters = [588, 733, 1555, 2000, 1266] @@ -158,7 +197,14 @@ def test_save_interacting_waters_trajectory(input_paths): special = "HEM" outputpath = "./" - save_interacting_waters_trajectory(topology_metal, trajetory_metal, interacting_waters, ligname, special, outputpath) + save_interacting_waters_trajectory( + topology_metal, + trajetory_metal, + interacting_waters, + ligname, + special, + outputpath, + ) interacting_water_pdb = "interacting_waters.pdb" interacting_water_dcd = "interacting_waters.dcd" @@ -176,10 +222,10 @@ def test_save_interacting_waters_trajectory(input_paths): def test_visualization(): - shutil.copy(str(clouds), '.') - shutil.copy(str(waters_pdb), '.') - shutil.copy(str(waters_dcd), '.') - shutil.copy(str(waters_pkl), '.') + shutil.copy(str(clouds), ".") + shutil.copy(str(waters_pdb), ".") + shutil.copy(str(waters_dcd), ".") + shutil.copy(str(waters_pkl), ".") ligand_name = "LET" receptor_type = "protein" height = "1000px" diff --git a/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py b/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py index 9da7bc2f..a3967ff3 100644 --- a/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py +++ b/openmmdl/tests/openmmdl_simulation/cleaning_procedures_test.py @@ -3,26 +3,36 @@ import pytest from pathlib import Path from unittest.mock import mock_open, patch -from openmmdl.openmmdl_simulation.scripts.cleaning_procedures import cleanup, create_directory_if_not_exists, post_md_file_movement, copy_file, create_directory_if_not_exists, organize_files +from openmmdl.openmmdl_simulation.scripts.cleaning_procedures import ( + cleanup, + create_directory_if_not_exists, + post_md_file_movement, + copy_file, + create_directory_if_not_exists, + organize_files, +) + @pytest.fixture def test_protein_name(): return "test_protein" + @pytest.fixture def test_directory_path(): return "test_directory" + def test_cleanup(test_protein_name): # Create a dummy file to be removed - with open(f'output_{test_protein_name}', 'w') as dummy_file: + with open(f"output_{test_protein_name}", "w") as dummy_file: dummy_file.write("Dummy content") # Call the cleanup function cleanup(test_protein_name) # Check if the file has been removed - assert not os.path.exists(f'output_{test_protein_name}') + assert not os.path.exists(f"output_{test_protein_name}") def test_create_directory_if_not_exists(test_directory_path): @@ -43,7 +53,7 @@ def test_create_directory_if_not_exists(test_directory_path): @patch("os.path.exists") @patch("shutil.copy") def test_copy_file(mock_copy, mock_exists): - + src = "source_file.txt" dest = "destination_directory" @@ -59,6 +69,7 @@ def test_copy_file(mock_copy, mock_exists): # Check that shutil.copy was called with the source file and destination directory mock_copy.assert_called_with(src, dest) + # Mock the os.path.exists and os.rename functions @patch("os.path.exists") @patch("os.rename") @@ -76,7 +87,8 @@ def test_organize_files(mock_rename, mock_exists): for call in mock_rename.call_args_list: print(call) -#def test_post_md_file_movement(): + +# def test_post_md_file_movement(): # # Get the absolute path to the test data directory # test_data_directory = Path("openmmdl/tests/data/in") # @@ -85,7 +97,7 @@ def test_organize_files(mock_rename, mock_exists): # protein_name = test_data_directory / '6b73.pdb' # prmtop = test_data_directory / '6b73.prmtop' # inpcrd = test_data_directory / '6b73.inpcrd' -# +# # # Assert that the input files exist before moving # assert os.path.exists(ligand) # assert os.path.exists(protein_name) @@ -94,7 +106,7 @@ def test_organize_files(mock_rename, mock_exists): # # # Call the post_md_file_movement function # post_md_file_movement(protein_name, prmtop, inpcrd, ligand) -# +# # # Check if the files have been organized and moved to the correct directories # input_files_dir = Path("Input_Files") # @@ -104,25 +116,26 @@ def test_organize_files(mock_rename, mock_exists): # assert os.path.exists(input_files_dir / "6b73.inpcrd") # assert os.path.exists(input_files_dir / "CVV.sdf") + def test_post_md_file_movement(): # Get the absolute path to the test data directory test_data_directory = Path("openmmdl/tests/data/in") # Define the full path to the input files - ligand = test_data_directory / 'CVV.sdf' - protein_name = test_data_directory / '6b73.pdb' - prmtop = test_data_directory / '6b73.prmtop' - inpcrd = test_data_directory / '6b73.inpcrd' - protein_no_solvent = test_data_directory / 'prepared_no_solvent_6b73.pdb' - protein_solvent = test_data_directory / 'solvent_padding_6b73.pdb' - protein_equilibration = test_data_directory / 'Equilibration_6b73.pdb' - protein_minimization = test_data_directory / 'Energyminimization_6b73.pdb' - output_pdb = test_data_directory / 'output_6b73.pdb' - mdtraj_top = test_data_directory / 'centered_old_coordinates_top.pdb' - prot_lig_top = test_data_directory / 'prot_lig_top.pdb' - checkpoint = test_data_directory / 'checkpoint.chk' - checkpoint_10x = test_data_directory / '10x_checkpoint.chk' - + ligand = test_data_directory / "CVV.sdf" + protein_name = test_data_directory / "6b73.pdb" + prmtop = test_data_directory / "6b73.prmtop" + inpcrd = test_data_directory / "6b73.inpcrd" + protein_no_solvent = test_data_directory / "prepared_no_solvent_6b73.pdb" + protein_solvent = test_data_directory / "solvent_padding_6b73.pdb" + protein_equilibration = test_data_directory / "Equilibration_6b73.pdb" + protein_minimization = test_data_directory / "Energyminimization_6b73.pdb" + output_pdb = test_data_directory / "output_6b73.pdb" + mdtraj_top = test_data_directory / "centered_old_coordinates_top.pdb" + prot_lig_top = test_data_directory / "prot_lig_top.pdb" + checkpoint = test_data_directory / "checkpoint.chk" + checkpoint_10x = test_data_directory / "10x_checkpoint.chk" + # Assert that the input files exist before moving assert os.path.exists(ligand) assert os.path.exists(protein_name) @@ -130,21 +143,21 @@ def test_post_md_file_movement(): assert os.path.exists(inpcrd) assert os.path.exists(protein_no_solvent) - shutil.copy(str(protein_no_solvent), '.') - shutil.copy(str(protein_solvent), '.') - shutil.copy(str(protein_equilibration), '.') - shutil.copy(str(protein_minimization), '.') - shutil.copy(str(output_pdb), '.') - shutil.copy(str(mdtraj_top), '.') - shutil.copy(str(prot_lig_top), '.') - shutil.copy(str(checkpoint), '.') - shutil.copy(str(checkpoint_10x), '.') - shutil.copy(str(protein_name), '.') - protein_name = '6b73.pdb' + shutil.copy(str(protein_no_solvent), ".") + shutil.copy(str(protein_solvent), ".") + shutil.copy(str(protein_equilibration), ".") + shutil.copy(str(protein_minimization), ".") + shutil.copy(str(output_pdb), ".") + shutil.copy(str(mdtraj_top), ".") + shutil.copy(str(prot_lig_top), ".") + shutil.copy(str(checkpoint), ".") + shutil.copy(str(checkpoint_10x), ".") + shutil.copy(str(protein_name), ".") + protein_name = "6b73.pdb" # Call the post_md_file_movement function post_md_file_movement(str(protein_name), str(prmtop), str(inpcrd), [str(ligand)]) - + # Check if the files have been organized and moved to the correct directories input_files_dir = Path("Input_Files") md_files_dir = Path("MD_Files") @@ -156,14 +169,19 @@ def test_post_md_file_movement(): assert os.path.exists(md_files_dir / "Pre_MD") assert os.path.exists(md_files_dir / "Pre_MD" / "prepared_no_solvent_6b73.pdb") assert os.path.exists(md_files_dir / "Pre_MD" / "solvent_padding_6b73.pdb") - assert os.path.exists(md_files_dir / "Minimization_Equilibration" / "Equilibration_6b73.pdb") - assert os.path.exists(md_files_dir / "Minimization_Equilibration" / "Energyminimization_6b73.pdb") + assert os.path.exists( + md_files_dir / "Minimization_Equilibration" / "Equilibration_6b73.pdb" + ) + assert os.path.exists( + md_files_dir / "Minimization_Equilibration" / "Energyminimization_6b73.pdb" + ) assert os.path.exists(md_files_dir / "MD_Output" / "output_6b73.pdb") assert os.path.exists(md_postprocessing_dir / "centered_old_coordinates_top.pdb") assert os.path.exists(final_output_dir / "Prot_Lig" / "prot_lig_top.pdb") assert os.path.exists(checkpoints_dir / "checkpoint.chk") assert os.path.exists(checkpoints_dir / "10x_checkpoint.chk") + # Run the tests if __name__ == "__main__": pytest.main() diff --git a/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py b/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py index dcc0f4ce..cd0967f9 100644 --- a/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py +++ b/openmmdl/tests/openmmdl_simulation/forcefield_water_test.py @@ -2,118 +2,173 @@ import simtk.openmm.app as app from openff.toolkit.topology import Molecule from openmmforcefields.generators import GAFFTemplateGenerator -from openmmdl.openmmdl_simulation.scripts.forcefield_water import ff_selection, water_forcefield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield +from openmmdl.openmmdl_simulation.scripts.forcefield_water import ( + ff_selection, + water_forcefield_selection, + water_model_selection, + generate_forcefield, + generate_transitional_forcefield, +) # Replace 'your_module' with the actual name of the module containing your functions. + @pytest.fixture def sample_rdkit_molecule(): """ A sample RDKit molecule for testing. """ from rdkit import Chem + mol = Chem.MolFromSmiles("CCO") return mol + def test_ff_selection(): - assert ff_selection('AMBER14') == 'amber14-all.xml' - assert ff_selection('AMBER99SB') == 'amber99sb.xml' - assert ff_selection('AMBER99SB-ILDN') == 'amber99sbildn.xml' - assert ff_selection('AMBER03') == 'amber03.xml' - assert ff_selection('AMBER10') == 'amber10.xml' - assert ff_selection('CHARMM36') == 'charmm36.xml' - assert ff_selection('NonexistentFF') is None + assert ff_selection("AMBER14") == "amber14-all.xml" + assert ff_selection("AMBER99SB") == "amber99sb.xml" + assert ff_selection("AMBER99SB-ILDN") == "amber99sbildn.xml" + assert ff_selection("AMBER03") == "amber03.xml" + assert ff_selection("AMBER10") == "amber10.xml" + assert ff_selection("CHARMM36") == "charmm36.xml" + assert ff_selection("NonexistentFF") is None + def test_water_forcefield_selection(): # Test cases for 'amber14-all.xml' force field - assert water_forcefield_selection('TIP3P', 'amber14-all.xml') == 'amber14/tip3p.xml' - assert water_forcefield_selection('TIP3P-FB', 'amber14-all.xml') == 'amber14/tip3pfb.xml' - assert water_forcefield_selection('SPC/E', 'amber14-all.xml') == 'amber14/spce.xml' - assert water_forcefield_selection('TIP4P-Ew', 'amber14-all.xml') == 'amber14/tip4pew.xml' - assert water_forcefield_selection('TIP4P-FB', 'amber14-all.xml') == 'amber14/tip4pfb.xml' - assert water_forcefield_selection('TIP5P', 'amber14-all.xml') is None - assert water_forcefield_selection('NonexistentWater', 'amber14-all.xml') is None - assert water_forcefield_selection('TIP3P', 'NonexistentFF') is None + assert water_forcefield_selection("TIP3P", "amber14-all.xml") == "amber14/tip3p.xml" + assert ( + water_forcefield_selection("TIP3P-FB", "amber14-all.xml") + == "amber14/tip3pfb.xml" + ) + assert water_forcefield_selection("SPC/E", "amber14-all.xml") == "amber14/spce.xml" + assert ( + water_forcefield_selection("TIP4P-Ew", "amber14-all.xml") + == "amber14/tip4pew.xml" + ) + assert ( + water_forcefield_selection("TIP4P-FB", "amber14-all.xml") + == "amber14/tip4pfb.xml" + ) + assert water_forcefield_selection("TIP5P", "amber14-all.xml") is None + assert water_forcefield_selection("NonexistentWater", "amber14-all.xml") is None + assert water_forcefield_selection("TIP3P", "NonexistentFF") is None # Test cases for 'charmm36.xml' force field - assert water_forcefield_selection('CHARMM default', 'charmm36.xml') == 'charmm36/water.xml' - assert water_forcefield_selection('TIP3P-PME-B', 'charmm36.xml') == 'charmm36/tip3p-pme-b.xml' - assert water_forcefield_selection('TIP3P-PME-F', 'charmm36.xml') == 'charmm36/tip3p-pme-f.xml' - assert water_forcefield_selection('SPC/E', 'charmm36.xml') == 'charmm36/spce.xml' - assert water_forcefield_selection('TIP4P-Ew', 'charmm36.xml') == 'charmm36/tip4pew.xml' - assert water_forcefield_selection('TIP4P-2005', 'charmm36.xml') == 'charmm36/tip4p2005.xml' - assert water_forcefield_selection('TIP5P', 'charmm36.xml') == 'charmm36/tip5p.xml' - assert water_forcefield_selection('TIP5P-Ew', 'charmm36.xml') == 'charmm36/tip5pew.xml' - assert water_forcefield_selection('NonexistentWater', 'charmm36.xml') is None - assert water_forcefield_selection('NonexistentFF', 'charmm36.xml') is None + assert ( + water_forcefield_selection("CHARMM default", "charmm36.xml") + == "charmm36/water.xml" + ) + assert ( + water_forcefield_selection("TIP3P-PME-B", "charmm36.xml") + == "charmm36/tip3p-pme-b.xml" + ) + assert ( + water_forcefield_selection("TIP3P-PME-F", "charmm36.xml") + == "charmm36/tip3p-pme-f.xml" + ) + assert water_forcefield_selection("SPC/E", "charmm36.xml") == "charmm36/spce.xml" + assert ( + water_forcefield_selection("TIP4P-Ew", "charmm36.xml") == "charmm36/tip4pew.xml" + ) + assert ( + water_forcefield_selection("TIP4P-2005", "charmm36.xml") + == "charmm36/tip4p2005.xml" + ) + assert water_forcefield_selection("TIP5P", "charmm36.xml") == "charmm36/tip5p.xml" + assert ( + water_forcefield_selection("TIP5P-Ew", "charmm36.xml") == "charmm36/tip5pew.xml" + ) + assert water_forcefield_selection("NonexistentWater", "charmm36.xml") is None + assert water_forcefield_selection("NonexistentFF", "charmm36.xml") is None + def test_water_model_selection(): - assert water_model_selection('TIP3P', 'amber99sb.xml') == 'tip3p' - assert water_model_selection('TIP3P', 'amber99sbildn.xml') == 'tip3p' - assert water_model_selection('TIP3P', 'amber03.xml') == 'tip3p' - assert water_model_selection('TIP3P', 'amber10.xml') == 'tip3p' - - assert water_model_selection('SPC/E', 'amber99sb.xml') == 'spce' - assert water_model_selection('SPC/E', 'amber99sbildn.xml') == 'spce' - assert water_model_selection('SPC/E', 'amber03.xml') == 'spce' - assert water_model_selection('SPC/E', 'amber10.xml') == 'spce' - - assert water_model_selection('TIP4P-Ew', 'amber99sb.xml') == 'tip4pew' - assert water_model_selection('TIP4P-Ew', 'amber99sbildn.xml') == 'tip4pew' - assert water_model_selection('TIP4P-Ew', 'amber03.xml') == 'tip4pew' - assert water_model_selection('TIP4P-Ew', 'amber10.xml') == 'tip4pew' - - assert water_model_selection('TIP4P-FB', 'amber99sb.xml') == 'tip4pfb' - assert water_model_selection('TIP4P-FB', 'amber99sbildn.xml') == 'tip4pfb' - assert water_model_selection('TIP4P-FB', 'amber03.xml') == 'tip4pfb' - assert water_model_selection('TIP4P-FB', 'amber10.xml') == 'tip4pfb' - - assert water_model_selection('TIP5P', 'amber99sb.xml') is None - assert water_model_selection('TIP5P', 'amber99sbildn.xml') is None - assert water_model_selection('TIP5P', 'amber03.xml') is None - assert water_model_selection('TIP5P', 'amber10.xml') is None - assert water_model_selection('TIP5P', 'amber14-all.xml') is None # Missing in the initial version - - assert water_model_selection('TIP3P', 'amber14-all.xml') == 'tip3p' - - assert water_model_selection('CHARMM default', 'charmm36.xml') == 'charmm' - assert water_model_selection('TIP3P-PME-B', 'charmm36.xml') == 'charmm' - assert water_model_selection('TIP3P-PME-F', 'charmm36.xml') == 'charmm' - assert water_model_selection('SPC/E', 'charmm36.xml') == 'charmm' - assert water_model_selection('TIP4P-Ew', 'charmm36.xml') == 'tip4pew' - assert water_model_selection('TIP4P-2005', 'charmm36.xml') == 'tip4pew' - assert water_model_selection('TIP5P', 'charmm36.xml') == 'tip5p' - assert water_model_selection('TIP5P-Ew', 'charmm36.xml') == 'tip5p' - - assert water_model_selection('TIP3P', 'NonexistentFF') is None + assert water_model_selection("TIP3P", "amber99sb.xml") == "tip3p" + assert water_model_selection("TIP3P", "amber99sbildn.xml") == "tip3p" + assert water_model_selection("TIP3P", "amber03.xml") == "tip3p" + assert water_model_selection("TIP3P", "amber10.xml") == "tip3p" + + assert water_model_selection("SPC/E", "amber99sb.xml") == "spce" + assert water_model_selection("SPC/E", "amber99sbildn.xml") == "spce" + assert water_model_selection("SPC/E", "amber03.xml") == "spce" + assert water_model_selection("SPC/E", "amber10.xml") == "spce" + + assert water_model_selection("TIP4P-Ew", "amber99sb.xml") == "tip4pew" + assert water_model_selection("TIP4P-Ew", "amber99sbildn.xml") == "tip4pew" + assert water_model_selection("TIP4P-Ew", "amber03.xml") == "tip4pew" + assert water_model_selection("TIP4P-Ew", "amber10.xml") == "tip4pew" + + assert water_model_selection("TIP4P-FB", "amber99sb.xml") == "tip4pfb" + assert water_model_selection("TIP4P-FB", "amber99sbildn.xml") == "tip4pfb" + assert water_model_selection("TIP4P-FB", "amber03.xml") == "tip4pfb" + assert water_model_selection("TIP4P-FB", "amber10.xml") == "tip4pfb" + + assert water_model_selection("TIP5P", "amber99sb.xml") is None + assert water_model_selection("TIP5P", "amber99sbildn.xml") is None + assert water_model_selection("TIP5P", "amber03.xml") is None + assert water_model_selection("TIP5P", "amber10.xml") is None + assert ( + water_model_selection("TIP5P", "amber14-all.xml") is None + ) # Missing in the initial version + + assert water_model_selection("TIP3P", "amber14-all.xml") == "tip3p" + + assert water_model_selection("CHARMM default", "charmm36.xml") == "charmm" + assert water_model_selection("TIP3P-PME-B", "charmm36.xml") == "charmm" + assert water_model_selection("TIP3P-PME-F", "charmm36.xml") == "charmm" + assert water_model_selection("SPC/E", "charmm36.xml") == "charmm" + assert water_model_selection("TIP4P-Ew", "charmm36.xml") == "tip4pew" + assert water_model_selection("TIP4P-2005", "charmm36.xml") == "tip4pew" + assert water_model_selection("TIP5P", "charmm36.xml") == "tip5p" + assert water_model_selection("TIP5P-Ew", "charmm36.xml") == "tip5p" + + assert water_model_selection("TIP3P", "NonexistentFF") is None + def test_generate_forcefield_with_membrane(sample_rdkit_molecule): - forcefield = generate_forcefield('amber14-all.xml', 'amber14/tip3p.xml', True, sample_rdkit_molecule) + forcefield = generate_forcefield( + "amber14-all.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule + ) assert isinstance(forcefield, app.ForceField) # Add additional assertions specific to the case with a membrane + def test_generate_forcefield_without_membrane(sample_rdkit_molecule): - forcefield = generate_forcefield('amber14-all.xml', 'amber14/tip3p.xml', False, sample_rdkit_molecule) + forcefield = generate_forcefield( + "amber14-all.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(forcefield, app.ForceField) # Add additional assertions specific to the case without a membrane + def test_generate_forcefield_with_old_amber_forcefield(sample_rdkit_molecule): - forcefield = generate_forcefield('amber99sb.xml', 'amber14/tip3p.xml', True, sample_rdkit_molecule) + forcefield = generate_forcefield( + "amber99sb.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule + ) assert isinstance(forcefield, app.ForceField) # Add additional assertions specific to the case with an old Amber forcefield + def test_generate_forcefield_without_small_molecule(): - forcefield = generate_forcefield('amber14-all.xml', 'amber14/tip3p.xml', False) + forcefield = generate_forcefield("amber14-all.xml", "amber14/tip3p.xml", False) assert isinstance(forcefield, app.ForceField) # Add additional assertions specific to the case without a small molecule - def test_generate_forcefield_membrane_logic(sample_rdkit_molecule): - forcefield_1 = generate_forcefield('amber10.xml', 'tip3p.xml', True, sample_rdkit_molecule) - forcefield_2 = generate_forcefield('amber14-all.xml', 'amber14/tip3p.xml', True, sample_rdkit_molecule) - forcefield_3 = generate_forcefield('amber14-all.xml', 'amber14/tip3p.xml', False, sample_rdkit_molecule) - forcefield_4 = generate_forcefield('amber03.xml', 'tip3p.xml', False, sample_rdkit_molecule) + forcefield_1 = generate_forcefield( + "amber10.xml", "tip3p.xml", True, sample_rdkit_molecule + ) + forcefield_2 = generate_forcefield( + "amber14-all.xml", "amber14/tip3p.xml", True, sample_rdkit_molecule + ) + forcefield_3 = generate_forcefield( + "amber14-all.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule + ) + forcefield_4 = generate_forcefield( + "amber03.xml", "tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(forcefield_1, app.ForceField) assert isinstance(forcefield_2, app.ForceField) @@ -121,31 +176,48 @@ def test_generate_forcefield_membrane_logic(sample_rdkit_molecule): assert isinstance(forcefield_4, app.ForceField) # Additional tests for different force field combinations - forcefield_5 = generate_forcefield('amber14-all.xml', 'tip3p.xml', True, sample_rdkit_molecule) - forcefield_6 = generate_forcefield('amber03.xml', 'amber14/tip3p.xml', False, sample_rdkit_molecule) + forcefield_5 = generate_forcefield( + "amber14-all.xml", "tip3p.xml", True, sample_rdkit_molecule + ) + forcefield_6 = generate_forcefield( + "amber03.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(forcefield_5, app.ForceField) assert isinstance(forcefield_6, app.ForceField) # Additional tests for membrane flag logic - forcefield_7 = generate_forcefield('amber10.xml', 'tip3p.xml', True, sample_rdkit_molecule) - forcefield_8 = generate_forcefield('amber14-all.xml', 'tip3p.xml', False, sample_rdkit_molecule) + forcefield_7 = generate_forcefield( + "amber10.xml", "tip3p.xml", True, sample_rdkit_molecule + ) + forcefield_8 = generate_forcefield( + "amber14-all.xml", "tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(forcefield_7, app.ForceField) assert isinstance(forcefield_8, app.ForceField) + def test_generate_transitional_forcefield(sample_rdkit_molecule): - transitional_forcefield = generate_transitional_forcefield('amber14-all.xml', 'tip3p.xml', True, sample_rdkit_molecule) + transitional_forcefield = generate_transitional_forcefield( + "amber14-all.xml", "tip3p.xml", True, sample_rdkit_molecule + ) assert isinstance(transitional_forcefield, app.ForceField) # Additional tests for different force field combinations - transitional_forcefield_2 = generate_transitional_forcefield('amber03.xml', 'amber14/tip3p.xml', False, sample_rdkit_molecule) + transitional_forcefield_2 = generate_transitional_forcefield( + "amber03.xml", "amber14/tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(transitional_forcefield_2, app.ForceField) # Additional tests for membrane flag logic - transitional_forcefield_3 = generate_transitional_forcefield('amber14-all.xml', 'tip3p.xml', False, sample_rdkit_molecule) + transitional_forcefield_3 = generate_transitional_forcefield( + "amber14-all.xml", "tip3p.xml", False, sample_rdkit_molecule + ) assert isinstance(transitional_forcefield_3, app.ForceField) # Additional tests for GAFF registration - transitional_forcefield_4 = generate_transitional_forcefield('amber14-all.xml', 'tip3p.xml', True) + transitional_forcefield_4 = generate_transitional_forcefield( + "amber14-all.xml", "tip3p.xml", True + ) assert isinstance(transitional_forcefield_4, app.ForceField) diff --git a/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py b/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py index 00102149..78ae71f5 100644 --- a/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py +++ b/openmmdl/tests/openmmdl_simulation/protein_ligand_prep_test.py @@ -16,31 +16,55 @@ import simtk.openmm.app as app -from simtk.openmm.app import PDBFile, Modeller, PDBReporter, StateDataReporter, DCDReporter, CheckpointReporter +from simtk.openmm.app import ( + PDBFile, + Modeller, + PDBReporter, + StateDataReporter, + DCDReporter, + CheckpointReporter, +) from simtk.openmm import unit, Platform, MonteCarloBarostat, LangevinMiddleIntegrator from simtk.openmm import Vec3 import simtk.openmm as mm -from openmmdl.openmmdl_simulation.scripts.forcefield_water import ff_selection, water_forcefield_selection, water_model_selection, generate_forcefield, generate_transitional_forcefield -from openmmdl.openmmdl_simulation.scripts.protein_ligand_prep import prepare_ligand, rdkit_to_openmm, merge_protein_and_ligand, water_padding_solvent_builder, water_absolute_solvent_builder, membrane_builder, water_conversion -from openmmdl.openmmdl_simulation.scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion +from openmmdl.openmmdl_simulation.scripts.forcefield_water import ( + ff_selection, + water_forcefield_selection, + water_model_selection, + generate_forcefield, + generate_transitional_forcefield, +) +from openmmdl.openmmdl_simulation.scripts.protein_ligand_prep import ( + prepare_ligand, + rdkit_to_openmm, + merge_protein_and_ligand, + water_padding_solvent_builder, + water_absolute_solvent_builder, + membrane_builder, + water_conversion, +) +from openmmdl.openmmdl_simulation.scripts.post_md_conversions import ( + mdtraj_conversion, + MDanalysis_conversion, +) protein = "6b73.pdb" -ligand = 'CVV.sdf' +ligand = "CVV.sdf" ligand_name = "UNK" minimization = False sanitization = False -ff = 'AMBER14' -water = 'SPC/E' +ff = "AMBER14" +water = "SPC/E" add_membrane = False Water_Box = "Buffer" water_padding_distance = 1.0 -water_boxShape = 'cube' +water_boxShape = "cube" water_ionicstrength = 0.15 -water_positive_ion = 'Na+' -water_negative_ion = 'Cl-' +water_positive_ion = "Na+" +water_negative_ion = "Cl-" water_box_x = 6.873 water_box_y = 7.0 @@ -56,7 +80,6 @@ test_data_directory = Path("openmmdl/tests/data/in") - # Define the full path to the input SDF file TEST_LIGAND_FILE = f"{test_data_directory}/CVV.sdf" TEST_MOL_FILE = f"{test_data_directory}/CVV.mol" @@ -66,15 +89,23 @@ protein_pdb = pdbfixer.PDBFixer(str(TEST_PROTEIN)) -ligand_prepared = prepare_ligand(TEST_LIGAND_FILE,minimize_molecule=minimization) +ligand_prepared = prepare_ligand(TEST_LIGAND_FILE, minimize_molecule=minimization) omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) forcefield_selected = ff_selection(ff) -water_selected = water_forcefield_selection(water=water,forcefield_selection=ff_selection(ff)) -model_water = water_model_selection(water=water,forcefield_selection=ff_selection(ff)) -forcefield = generate_forcefield(protein_ff=forcefield_selected, solvent_ff=water_selected, add_membrane=add_membrane, rdkit_mol=ligand_prepared) +water_selected = water_forcefield_selection( + water=water, forcefield_selection=ff_selection(ff) +) +model_water = water_model_selection(water=water, forcefield_selection=ff_selection(ff)) +forcefield = generate_forcefield( + protein_ff=forcefield_selected, + solvent_ff=water_selected, + add_membrane=add_membrane, + rdkit_mol=ligand_prepared, +) complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) modeller = app.Modeller(complex_topology, complex_positions) + # Test the prepare_ligand function def test_prepare_ligand(): # Test the function with the sample ligand file. @@ -82,33 +113,61 @@ def test_prepare_ligand(): rdkit_mol_mol2_2 = prepare_ligand(TEST_MOL2_FILE, minimize_molecule=True) rdkit_mol_mol = prepare_ligand(TEST_MOL_FILE, minimize_molecule=False) rdkit_mol_mol2 = prepare_ligand(TEST_MOL2_FILE, minimize_molecule=False) - + # Add your assertions here to check if the preparation worked as expected assert rdkit_mol_sdf is not None # Check if the result is not None assert rdkit_mol_mol2_2 is not None # Check if the result is not None assert rdkit_mol_mol is not None # Check if the result is not None assert rdkit_mol_mol2 is not None # Check if the result is not None + def test_rdkit_to_openmm(): omm_ligand = rdkit_to_openmm(ligand_prepared, ligand_name) assert isinstance(omm_ligand, simtk.openmm.app.Modeller) + def test_merge_protein_and_ligand(): - complex_topology, complex_positions = merge_protein_and_ligand(protein_pdb, omm_ligand) + complex_topology, complex_positions = merge_protein_and_ligand( + protein_pdb, omm_ligand + ) assert complex_topology is not None assert complex_positions is not None + def test_water_padding_solvent_builder(): - protein_buffer_solved = water_padding_solvent_builder(model_water, forcefield, water_padding_distance, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) + protein_buffer_solved = water_padding_solvent_builder( + model_water, + forcefield, + water_padding_distance, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) assert protein_buffer_solved is not None + def test_water_absolute_solvent_builder(): test_data_directory = Path("openmmdl/tests/data/in") TEST_PROTEIN = f"{test_data_directory}/6b73.pdb" protein_pdb = pdbfixer.PDBFixer(str(TEST_PROTEIN)) - protein_absolute_solved = water_absolute_solvent_builder(model_water, forcefield, water_box_x, water_box_y, water_box_z, protein_pdb, modeller, water_positive_ion, water_negative_ion, water_ionicstrength, protein) + protein_absolute_solved = water_absolute_solvent_builder( + model_water, + forcefield, + water_box_x, + water_box_y, + water_box_z, + protein_pdb, + modeller, + water_positive_ion, + water_negative_ion, + water_ionicstrength, + protein, + ) assert protein_absolute_solved is not None -if __name__ == '__main__': +if __name__ == "__main__": pytest.main() diff --git a/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py b/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py index dbbc95ca..030f7304 100644 --- a/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py +++ b/openmmdl/tests/openmmdl_simulation/test_post_md_conversions.py @@ -4,63 +4,73 @@ from pathlib import Path import mdtraj as md -from openmmdl.openmmdl_simulation.scripts.post_md_conversions import mdtraj_conversion, MDanalysis_conversion +from openmmdl.openmmdl_simulation.scripts.post_md_conversions import ( + mdtraj_conversion, + MDanalysis_conversion, +) test_data_directory = Path("openmmdl/tests/data/in") pdb_file = "0_unk_hoh.pdb" dcd_file = "trajectory.dcd" -ligand_name = 'UNK' +ligand_name = "UNK" + def test_mdtraj_conversion(): original_cwd = os.getcwd() os.chdir(test_data_directory) # Create temporary directories to save the output files output_file_dcd = "centered_old_coordinates.dcd" - output_file_xtc = 'centered_old_coordinates.xtc' - output_file_pdb = 'centered_old_coordinates_top.pdb' - output_file_gro = 'centered_old_coordinates_top.gro' - + output_file_xtc = "centered_old_coordinates.xtc" + output_file_pdb = "centered_old_coordinates_top.pdb" + output_file_gro = "centered_old_coordinates_top.gro" + mdtraj_conversion(pdb_file, "gro_xtc") mdtraj_conversion(pdb_file, "pdb_dcd") - + assert output_file_dcd is not None assert output_file_xtc is not None assert output_file_pdb is not None assert output_file_gro is not None os.chdir(original_cwd) + def test_mdanalysis_conversion(): original_cwd = Path(os.getcwd()) test_data_directory = Path("openmmdl/tests/data/in") post_mdtraj_pdb_file = test_data_directory / "centered_old_coordinates_top.pdb" post_mdtraj_dcd_file = test_data_directory / "centered_old_coordinates.dcd" - + # Create temporary directories to save the output files all_file_dcd = "centered_traj.dcd" all_file_dcd_unaligned = "centered_traj_unaligned.dcd" - all_file_pdb = 'centered_top.pdb' - prot_lig_file_dcd = 'prot_lig_traj.dcd' - prot_lig_file_dcd_unaligned = 'prot_lig_traj_unaligned.dcd' - prot_lig_file_pdb = 'prot_lig_top.pdb' + all_file_pdb = "centered_top.pdb" + prot_lig_file_dcd = "prot_lig_traj.dcd" + prot_lig_file_dcd_unaligned = "prot_lig_traj_unaligned.dcd" + prot_lig_file_pdb = "prot_lig_top.pdb" all_file_xtc = "centered_traj.xtc" all_file_xtc_unaligned = "centered_traj_unaligned.xtc" all_file_gro = "centered_top.gro" - prot_lig_file_xtc = 'prot_lig_traj.xtc' - prot_lig_file_xtc_unaligned = 'prot_lig_traj_unaligned.xtc' - prot_lig_file_gro = 'prot_lig_top.gro' + prot_lig_file_xtc = "prot_lig_traj.xtc" + prot_lig_file_xtc_unaligned = "prot_lig_traj_unaligned.xtc" + prot_lig_file_gro = "prot_lig_top.gro" - shutil.copy(str(post_mdtraj_pdb_file), '.') - shutil.copy(str(post_mdtraj_dcd_file), '.') + shutil.copy(str(post_mdtraj_pdb_file), ".") + shutil.copy(str(post_mdtraj_dcd_file), ".") post_mdtraj_pdb_file = "centered_old_coordinates_top.pdb" post_mdtraj_dcd_file = "centered_old_coordinates.dcd" ligand_name = "UNK" mda_output = "pdb_dcd_gro_xtc" output_selection = "mda_prot_lig_all" - - #MDanalysis_conversion(pdb_file, dcd_file, ligand_name, "pdb_dcd_gro_xtc", "mda_prot_lig_all") - MDanalysis_conversion(post_mdtraj_pdb_file, post_mdtraj_dcd_file, mda_output, output_selection, ligand_name) + # MDanalysis_conversion(pdb_file, dcd_file, ligand_name, "pdb_dcd_gro_xtc", "mda_prot_lig_all") + MDanalysis_conversion( + post_mdtraj_pdb_file, + post_mdtraj_dcd_file, + mda_output, + output_selection, + ligand_name, + ) assert all_file_dcd is not None assert all_file_dcd_unaligned is not None