GMPavanLab · CharlyEmpereurmot · Oct 3, 2020 · Sep 15, 2020 · Sep 15, 2020 · Sep 16, 2020
@@ -99,3 +99,6 @@ ENV/
 .pytest_cache/
 
 # data
+/distributions.png
+/G1_DATA/.aa_traj.xtc_offsets.npz
+/G1_DATA/.cg_traj.xtc_offsets.npz
@@ -7,35 +7,35 @@ G1               1
 ; id type resnr residue   atom  cgnr    charge
 
 1      N0     1      G1     A1  1      0.00000
-2      N0     2      G1     A2  2      0.00000
-3     Nda     3      G1     A3  3      0.00000
-4      N0     4      G1     A4  4      0.00000
-5     Nda     5      G1     A5  5      0.00000
-6      Qd     6      G1     A6  6      1.00000
-7     Nda     7      G1     A7  7      0.00000
-8      Qd     8      G1     A8  8      1.00000
-9     Nda     9      G1     A9  9      0.00000
-10     N0    10      G1    A10  10     0.00000
-11    Nda    11      G1    A11  11     0.00000
-12     Qd    12      G1    A12  12     1.00000
-13    Nda    13      G1    A13  13     0.00000
-14     Qd    14      G1    A14  14     1.00000
-15    Nda    15      G1    A15  15     0.00000
-16     N0    16      G1    A16  16     0.00000
-17    Nda    17      G1    A17  17     0.00000
-18     Qd    18      G1    A18  18     1.00000
-19    Nda    19      G1    A19  19     0.00000
-20     Qd    20      G1    A20  20     1.00000
-21    Nda    21      G1    A21  21     0.00000
-22     N0    22      G1    A22  22     0.00000
-23    Nda    23      G1    A23  23     0.00000
-24     Qd    24      G1    A24  24     1.00000
-25    Nda    25      G1    A25  25     0.00000
-26     Qd    26      G1    A26  26     1.00000
+2      N0     1      G1     A2  2      0.00000
+3     Nda     1      G1     A3  3      0.00000
+4      N0     1      G1     A4  4      0.00000
+5     Nda     1      G1     A5  5      0.00000
+6      Qd     1      G1     A6  6      1.00000
+7     Nda     1      G1     A7  7      0.00000
+8      Qd     1      G1     A8  8      1.00000
+9     Nda     1      G1     A9  9      0.00000
+10     N0     1      G1    A10  10     0.00000
+11    Nda     1      G1    A11  11     0.00000
+12     Qd     1      G1    A12  12     1.00000
+13    Nda     1      G1    A13  13     0.00000
+14     Qd     1      G1    A14  14     1.00000
+15    Nda     1      G1    A15  15     0.00000
+16     N0     1      G1    A16  16     0.00000
+17    Nda     1      G1    A17  17     0.00000
+18     Qd     1      G1    A18  18     1.00000
+19    Nda     1      G1    A19  19     0.00000
+20     Qd     1      G1    A20  20     1.00000
+21    Nda     1      G1    A21  21     0.00000
+22     N0     1      G1    A22  22     0.00000
+23    Nda     1      G1    A23  23     0.00000
+24     Qd     1      G1    A24  24     1.00000
+25    Nda     1      G1    A25  25     0.00000
+26     Qd     1      G1    A26  26     1.00000
 
 
 [ bonds ]
-;   i     j   funct   length   force.c.   
+;   i     j   funct   length   force.c.
 
 ; bond group 1
     1     2       1        0         0           ; B1
@@ -72,7 +72,7 @@ G1               1
 
 
 [ angles ]
-;   i     j     k   funct     angle   force.c.   
+;   i     j     k   funct     angle   force.c.
 
 ; angle group 1
     1     2    15       2       120         0           ; A1
@@ -114,3 +114,4 @@ G1               1
    22    23    24       2       180         0           ; A5
    22    25    26       2       180         0           ; A5
 
+
@@ -1,5 +1,5 @@
 g1_GMX.gro created by acpype (Rev: 10101) on Thu Sep 12 17:34:03 2019 t=   0.00000 step= 0
- 1281
+ 1282
     1PG1     N1    1   2.936   2.913   2.508
     1PG1     C2    2   2.896   2.600   2.705
     1PG1     H7    3   3.134   3.247   2.318
@@ -26,6 +26,7 @@ g1_GMX.gro created by acpype (Rev: 10101) on Thu Sep 12 17:34:03 2019 t=   0.000
     1PG1     H9   24   2.384   3.105   3.680
     1PG1    H11   25   3.353   3.308   3.377
     1PG1     N7   26   3.705   3.490   3.432
+    1PG1     N7   26   3.705   3.490   3.432
     2W        W   27   0.084   3.595   3.359
     3W        W   28   0.460   2.488   2.882
     4W        W   29   3.165   2.218   0.652

@@ -10,5 +10,5 @@ G1 (PAMAM) in water
 [ molecules ]
 ; Compound        #mols
 G1                1
-W                 1247
-CL-               8
+W                 1248
+CL-               8
@@ -1 +1 @@
-__version__ = 'v1.1.2'
+__version__ = '1.1.3'
@@ -662,3 +662,5 @@ def main():
 	run(ns)
 
 
+if __name__ == "__main__":
+	main()
@@ -2,23 +2,15 @@
 github_url = 'http://github.com/GMPavanLab/SwarmCG'
 gmx_path = 'gmx'
 
-# clustering, defaults
-default_dist_thres_bonds = 1 # nm
-default_dist_thres_angles = 180 # degrees
-default_dist_thres_dihedrals = 360 # degrees
-# default_dist_thres_bonds = 0.01 # for tests, force splitting groups with distribution clustering
-# default_dist_thres_angles = 1 # for tests, force splitting groups with distribution clustering
-# default_dist_thres_dihedrals = 1 # for tests, force splitting groups with distribution clustering
-
 # BI and FST-PSO OPTI, defaults
 kB = 0.008314462
-sim_temperature = 300 # Kelvin
-bi_nb_bins = 50 # nb of bins to use for Boltzmann Inversion, will be doubled for dihedrals distributions binning during BI -- this has huge impact on the results of the BI and this value shall STAY AT 50 ! actually I did not try to modify much but this feels like dangerous atm
-bonds_max_range = 5 # nm -- used to define grid for EMD calculations so increasing this only slightly increases computation time, however small bw for bonds has real impact
-bw_constraints = 0.002 # nm
-bw_bonds = 0.01 # nm
-bw_angles = 2.5 # degrees
-bw_dihedrals = 2.5 # degrees
+sim_temperature = 300  # Kelvin
+bi_nb_bins = 50  # nb of bins to use for Boltzmann Inversion, will be doubled for dihedrals distributions binning during BI -- this has huge impact on the results of the BI and this value shall STAY AT 50 ! actually I did not try to modify much but this feels like dangerous atm
+bonds_max_range = 20  # nm -- used to define grid for EMD calculations so increasing this only slightly increases computation time, however small bw for bonds has real impact
+bw_constraints = 0.002  # nm
+bw_bonds = 0.01  # nm
+bw_angles = 2.5  # degrees
+bw_dihedrals = 2.5  # degrees
 default_min_fct_bonds = 0
 default_max_fct_bonds_bi = 17000
 default_max_fct_bonds_opti = 18000
@@ -38,42 +30,43 @@
 default_abs_range_fct_dihedrals_bi_func_with_mult = 3.5
 default_abs_range_fct_dihedrals_opti_func_with_mult = 15
 
-bonds2angles_scoring_factor = 500 # multiplier applied to constraints/bonds EMD scores to retrieve angles/dihedrals mismatches that are comparable, for the opti scoring function
-sim_crash_EMD_indep_score = 150 # when a simulation crashes or does not finish for any reason: EMD distance between 2 distributions, for 1 geom
+bonds2angles_scoring_factor = 500  # multiplier applied to constraints/bonds EMD scores to retrieve angles/dihedrals mismatches that are comparable, for the opti scoring function
+sim_crash_EMD_indep_score = 150  # when a simulation crashes or does not finish for any reason: EMD distance between 2 distributions, for 1 geom
 
 # bonds scaling, default
-bonds_scaling = 1.0 # ratio
-min_bonds_length = 0.00 # nm
-bonds_scaling_str = '' # constraints and bonds ids + their required target AA-mapped distributions rescaled averages
+bonds_scaling = 1.0  # ratio
+min_bonds_length = 0.00  # nm
+bonds_scaling_str = ''  # constraints and bonds ids + their required target AA-mapped distributions rescaled averages
 
 # building of the initial guesses for optimization, defaults
-bond_dist_guess_variation = 0.025 # nm
-angle_value_guess_variation = 10 # degrees
-dihedral_value_guess_variation = 10 # degrees
-# val_guess_fact = 1.0 # factor to apply to initial geoms values to find low and high boundaries for random generation of particles' values -- now adjusted according to optimization cycles
-# fct_guess_fact = 0.2 # factor to apply to initial force constant to find low and high boundaries for random generation of particles' force constants -- now adjusted according to optimization cycles
-fct_guess_min_flat_diff_bonds = 200 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
-fct_guess_min_flat_diff_angles = 50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
-fct_guess_min_flat_diff_dihedrals_without_mult = 0.50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
-fct_guess_min_flat_diff_dihedrals_with_mult = 0.20 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
+bond_dist_guess_variation = 0.025  # nm
+angle_value_guess_variation = 10  # degrees
+dihedral_value_guess_variation = 10  # degrees
+fct_guess_min_flat_diff_bonds = 200  # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
+fct_guess_min_flat_diff_angles = 50  # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
+fct_guess_min_flat_diff_dihedrals_without_mult = 0.50  # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
+fct_guess_min_flat_diff_dihedrals_with_mult = 0.20  # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants
 
 # gromacs functions that are properly treated at the moment
 # if we find a function that is not handled, program will exit with an appropriate error message
-# TODO: handle dihedral function 9 correctly so that different potentials can be stacked
-# TODO: for the same beads -- this is the primary purpose of function 9 !!
+# TODO: handle dihedral function 9 correctly so that different potentials can be stacked for the same beads
+#       this is the primary purpose of function 9 !!
 handled_functions = {
-    'contraint': [1],
-    'bond': [1],
-    'angle': [1, 2],
-    'dihedral': [1, 2, 4],
-    'dihedral_with_mult': [1, 4]
-    # these functions use 3 parameters, the last one being multiplicity (if it's omitted gromacs will use 1 by default, we reproduce this behavior)
+    'constraint': [1],  # tested and verified: 1
+    'bond': [1],  # tested and verified: 1
+    'angle': [1, 2],  # tested and verified: 1, 2
+    'dihedral': [1, 2, 4],  # tested and verified: 1, 2, 4 -- ongoing: 9 (need to merge the 1+ dihedrals groups on plots)
+    'virtual_sites2': [1],  # tested and verified: 1 -- ongoing: 2 (need GMX 2020)
+    'virtual_sites3': [1, 2],  # tested and verified: 1, 2 -- ongoing: 3, 4
+    'virtual_sites4': [],  # ongoing: 2 -- irrelevant: 1
+    'virtual_sitesn': [1, 2, 3]  # tested and verified: 1, 2, 3
 }
+dihedral_func_with_mult = [1, 4, 9]  # these functions use 3 parameters, the last one being multiplicity
 
 # plots display parameters
-use_hists = False # hists are not implemented in a way that they will be displayed with left and right borders, as it is already the case for bonds
-line_alpha = 0.6 # line alpha for the density plots
-fill_alpha = 0.35 # fill alpha for the density plots
+use_hists = False  # hists are not implemented in a way that they will be displayed with left and right bold borders atm
+line_alpha = 0.6  # line alpha for the density plots
+fill_alpha = 0.35  # fill alpha for the density plots
 cg_color = '#1f77b4'
 atom_color = '#d62728'
 
@@ -108,7 +101,7 @@
 
 # optimization output filenames
 input_sim_files_dirname = '.internal/input_CG_simulation_files'
-iteration_sim_files_dirname = 'CG_sim_files' # basename to be appended to with _NN
+iteration_sim_files_dirname = 'CG_sim_files'  # basename to be appended to with _NN
 best_fitted_model_dirname = 'optimized_CG_model'
 distrib_plots_all_evals_dirname = 'all_evals_distributions'
 log_files_all_evals_dirname = 'all_evals_logs'

@@ -21,38 +21,44 @@
 def run(ns):
 
 	from numpy import VisibleDeprecationWarning
-	warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) # filter MDAnalysis + numpy deprecation stuff that is annoying
+	warnings.filterwarnings("ignore", category=VisibleDeprecationWarning)  # filter MDAnalysis + numpy deprecation stuff that is annoying
 
-	# TODO: make it possible to feed a delta for Rg in case the model has scaling ?
+	# TODO: make it possible to feed a delta/offset for Rg in case the model has bonds scaling ?
 
-	ns.molname_in = None # TODO: arguments that exist only in the scope of optimization (useless for manual model evaluation) -- but this could be modified to be allowed to evaluate models in mixed membranes, averaging distribs for given molecule name only
+	# get basenames for simulation files
+	ns.cg_itp_basename = os.path.basename(ns.cg_itp_filename)
+
+	# NOTE: some arguments exist only in the scope of optimization (optimize_model.py) or only in the scope of model
+	#       evaluation (evaluate_mode.py), so they need to be defined here
+	ns.molname_in = None
 	ns.gyr_aa_mapped, ns.gyr_aa_mapped_std = None, None
-	# ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None
-	ns.aa_rg_offset = 0
+	ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None
+	ns.aa_rg_offset = 0  # TODO: allow an argument more in evaluate_model, like in optimiwe_model, for adding an offset to Rg
 
 	scg.set_MDA_backend(ns)
 
-	# TODO: add missing checks -- if some are missing
-	# TODO: factorize all checks and put them in global lib
 	if not os.path.isfile(ns.aa_tpr_filename):
 		msg = (
-			"Cannot find coordinate file of the atomistic simulation"
-			"(GRO, PDB, or other trajectory formats supported by MDAnalysis)"
+			f"Cannot find topology file of the atomistic simulation at location: {ns.aa_tpr_filename}\n"
+			f"(TPR or other portable topology formats supported by MDAnalysis)"
 		)
 		raise exceptions.MissingCoordinateFile(msg)
 	if not os.path.isfile(ns.aa_traj_filename):
 		msg = (
-			"Cannot find trajectory file of the atomistic simulation"
-			"(XTC, TRR, or other trajectory formats supported by MDAnalysis)"
+			f"Cannot find trajectory file of the atomistic simulation at location: {ns.aa_traj_filename}\n"
+			f"(XTC, TRR, or other trajectory formats supported by MDAnalysis)"
 		)
 		raise exceptions.MissingTrajectoryFile(msg)
 
 	if not os.path.isfile(ns.cg_map_filename):
-		msg = "Cannot find CG beads mapping file (NDX-like file format)"
+		msg = (
+			f"Cannot find CG beads mapping file at location: {ns.cg_map_filename}\n"
+			f"(NDX-like file format)"
+		)
 		raise exceptions.MissingIndexFile(msg)
 
 	if not os.path.isfile(ns.cg_itp_filename):
-		msg = "Cannot find ITP file of the CG model"
+		msg = f"Cannot find ITP file of the CG model at location: {ns.cg_itp_filename}"
 		raise exceptions.MissingItpFile(msg)
 
 	# check bonds scaling arguments conflicts
@@ -71,7 +77,7 @@ def run(ns):
 
 	# display parameters for function compare_models
 	if not os.path.isfile(ns.cg_tpr_filename) or not os.path.isfile(ns.cg_traj_filename):
-		# switch to atomistic mapping inspection exclusively (= do NOT use real CG distributions)
+		# switch to atomistic mapping inspection exclusively (= do NOT plot the CG distributions)
 		print('Could not find file(s) for either CG topology or trajectory')
 		print('  Going for inspection of AA-mapped distributions exclusively')
 		print()
@@ -85,7 +91,26 @@ def run(ns):
 	except IndexError as e:
 		ns.plot_filename = ns.plot_filename+'.png'
 
-	scg.create_bins_and_dist_matrices(ns)
+	scg.create_bins_and_dist_matrices(ns)  # bins for EMD calculations
+	scg.read_ndx_atoms2beads(ns)  # read mapping, get atoms accurences in beads
+	scg.get_atoms_weights_in_beads(ns)  # get weights of atoms within beads
+
+	scg.read_cg_itp_file(ns)  # load the ITP object and find out geoms grouping
+	scg.process_scaling_str(ns)  # process the bonds scaling specified by user
+
+	print()
+	scg.read_aa_traj(ns)  # create universe and read traj
+	scg.load_aa_data(ns)  # read atoms attributes
+	scg.make_aa_traj_whole_for_selected_mols(ns)
+
+	# for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads
+	scg.get_beads_MDA_atomgroups(ns)
+
+	print('\nMapping the trajectory from AA to CG representation')
+	scg.initialize_cg_traj(ns)
+	scg.map_aa2cg_traj(ns)
+	print()
+
 	scg.compare_models(ns, manual_mode=True, calc_sasa=False)
 
 
@@ -128,7 +153,6 @@ def main():
 							   help='XTC file of your CG trajectory (omit for solo AA inspection)',
 							   type=str, default=config.metavar_cg_traj,
 							   metavar='     ' + scg.par_wrap(config.metavar_cg_traj))
-	# required_args.add_argument('-figmolname', dest='figmolname', help='TODO REMOVE', type=str, required=True) # TODO: remove, this was just for figures
 
 	optional_args = args_parser.add_argument_group(bullet + 'CG MODEL SCALING')
 	# optional_args.add_argument('-nb_threads', dest='nb_threads', help='number of threads to use', type=int, default=1, metavar='1') # TODO: does NOT work properly -- modif MDAnalysis code with OpenMP num_threads(n) in the pragma
@@ -147,8 +171,6 @@ def main():
 							   help=config.help_bonds2angles_scoring_factor, type=float,
 							   default=config.bonds2angles_scoring_factor,
 							   metavar='      ' + scg.par_wrap(config.bonds2angles_scoring_factor))
-	# ONLY FOR PAPER FIGURES
-	# optional_args.add_argument('-datamol', dest='datamol', help='Save bonded score and Rg values for each frame across simulation', type=str, default='MOL_EXEC_MODE')
 
 	graphical_args = args_parser.add_argument_group(bullet + 'FIGURE DISPLAY')
 	graphical_args.add_argument('-mismatch_ordering', dest='mismatch_order',
@@ -191,14 +213,17 @@ def main():
 								action='store_true', default=False)
 
 	# display help if script was called without arguments
-	if len(sys.argv) == 1:
-		args_parser.print_help()
-		sys.exit()
+	# if len(sys.argv) == 1:
+	# 	args_parser.print_help()
+	# 	sys.exit()
 
 	# arguments handling, display command line if help or no arguments provided
 	ns = args_parser.parse_args()
 	input_cmdline = ' '.join(map(cmd_quote, sys.argv))
 	print('Working directory:', os.getcwd())
 	print('Command line:', input_cmdline)
 
-	run(ns)
+	run(ns)
+
+if __name__ == "__main__":
+	main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,5 +10,5 @@ G1 (PAMAM) in water @@
     [ molecules ]
     ; Compound        #mols
     G1                1
-    W                 1247
-    CL-               8
+    W                 1248
+    CL-               8
Original file line number	Diff line number	Diff line change
Expand Up		@@ -662,3 +662,5 @@ def main():
		run(ns)


		if __name__ == "__main__":
		main()