Merge branch 'main' into dev-memory-profiling-v2

choderalab · Nov 9, 2024 · 1df3e1f · 1df3e1f
2 parents f907f83 + 426171a
commit 1df3e1f
Show file tree

Hide file tree

Showing 9 changed files with 110 additions and 85 deletions.
diff --git a/modelforge/dataset/dataset.py b/modelforge/dataset/dataset.py
@@ -1022,7 +1022,9 @@ def prepare_data(
                 raise FileNotFoundError(
                     f"Dataset statistics file {self.dataset_statistic_filename} not found. Please regenerate the cache."
                 )
-            log.info('Processed dataset already exists. Skipping "prepare_data" step.')
+            log.info(
+                f'Processed dataset already exists: {self.cache_processed_dataset_filename}. Skipping "prepare_data" step.'
+            )
             return None
 
         # if the dataset is not already processed, process it

diff --git a/modelforge/potential/aimnet2.py b/modelforge/potential/aimnet2.py
@@ -78,6 +78,7 @@ def __init__(
             [
                 AIMNet2InteractionModule(
                     number_of_per_atom_features=number_of_per_atom_features,
+                    number_of_radial_basis_functions=number_of_radial_basis_functions,
                     number_of_vector_features=number_of_vector_features,
                     activation_function=self.activation_function,
                     is_first_module=(i == 0),
@@ -238,6 +239,7 @@ class AIMNet2InteractionModule(nn.Module):
     def __init__(
         self,
         number_of_per_atom_features: int,
+        number_of_radial_basis_functions: int,
         number_of_vector_features: int,
         activation_function: nn.Module,
         is_first_module: bool = False,
@@ -246,12 +248,15 @@ def __init__(
         self.is_first_module = is_first_module
         self.number_of_per_atom_features = number_of_per_atom_features
         self.number_of_vector_features = number_of_vector_features
+        self.gs_to_fatom = Dense(
+            number_of_radial_basis_functions, number_of_per_atom_features, bias=False
+        )
 
         if not self.is_first_module:
             self.number_of_input_features = (
                 number_of_per_atom_features  # radial_contributions_emb
                 + number_of_vector_features  # vector_contributions_emb
-                + 1  # radial_contributions_charge (from charges)
+                + number_of_per_atom_features  # radial_contributions_charge
                 + number_of_vector_features  # vector_contributions_charge
             )
         else:
@@ -293,32 +298,30 @@ def calculate_radial_contributions(
         gs : Tensor
             Radial symmetry functions with shape (number_of_pairs, G).
         a_j : Tensor
-            Atomic features for each pair with shape (number_of_pairs,
-            F_atom).
+            Atomic features for each pair with shape (number_of_pairs, F_atom) or (number_of_pairs, 1).
         number_of_atoms : int
             Total number of atoms in the system.
         idx_j : Tensor
-            Indices mapping each pair to an atom, with shape
-            (number_of_pairs,).
+            Indices mapping each pair to an atom, with shape (number_of_pairs,).
 
         Returns
         -------
         Tensor
-            Radial contributions aggregated per atom, with shape
-            (number_of_atoms, F_atom).
+            Radial contributions aggregated per atom, with shape (number_of_atoms, F_atom).
         """
-        # Compute radial contributions
-        avf_s = gs.unsqueeze(-1) * a_j.unsqueeze(1)  # (number_of_pairs, G, F_atom)
+        # Map gs to shape (number_of_pairs, F_atom)
+        mapped_gs = self.gs_to_fatom(gs)  # Shape: (number_of_pairs, F_atom)
 
-        # Sum over G (if necessary)
-        avf_s = avf_s.sum(dim=1)  # Adjust if needed
+        # Compute avf_s using element-wise multiplication
+        avf_s = a_j * mapped_gs  # Shape: (number_of_pairs, F_atom)
 
         # Initialize tensor to accumulate radial contributions
         radial_contributions = torch.zeros(
             (number_of_atoms, avf_s.shape[-1]),
             device=avf_s.device,
             dtype=avf_s.dtype,
         )
+        # Aggregate per atom
         radial_contributions.index_add_(0, idx_j, avf_s)
 
         return radial_contributions

diff --git a/modelforge/potential/potential.py b/modelforge/potential/potential.py
@@ -664,6 +664,8 @@ def generate_trainer(
             seed_random_number(potential_seed)
 
         log.debug(f"{training_parameter=}")
+        log.debug(f"{potential_parameter=}")
+        log.debug(f"{runtime_parameter=}")
         log.debug(f"{dataset_parameter=}")
 
         trainer = PotentialTrainer(

diff --git a/modelforge/tests/data/potential_defaults/aimnet2.toml b/modelforge/tests/data/potential_defaults/aimnet2.toml
@@ -2,8 +2,8 @@
 potential_name = "AimNet2"
 
 [potential.core_parameter]
-number_of_radial_basis_functions = 32
-number_of_vector_features = 5
+number_of_radial_basis_functions = 64
+number_of_vector_features = 8
 maximum_interaction_radius = "5.0 angstrom"
 number_of_interaction_modules = 3
 predicted_properties = ["per_atom_energy"]
@@ -16,13 +16,11 @@ activation_function_name = "GeLU"
 properties_to_featurize = ['atomic_number']
 [potential.core_parameter.featurization.atomic_number]
 maximum_atomic_number = 101
-number_of_per_atom_features = 32
+number_of_per_atom_features = 64
 
 [potential.postprocessing_parameter]
 properties_to_process = ['per_atom_energy']
 [potential.postprocessing_parameter.per_atom_energy]
 normalize = true
 from_atom_to_system_reduction = true
 keep_per_atom_property = true
-[potential.postprocessing_parameter.general_postprocessing_operation]
-calculate_molecular_self_energy = true
diff --git a/modelforge/tests/data/training_defaults/default.toml b/modelforge/tests/data/training_defaults/default.toml
@@ -14,7 +14,7 @@ save_dir = "logs"
 # ------------------------------------------------------------ #
 [training.experiment_logger.wandb_configuration]
 save_dir = "logs"
-project = "training_potentials"
+project = "tests"
 group = "exp00"
 log_model = true
 job_type = "testing"
@@ -34,7 +34,7 @@ threshold = 0.1
 threshold_mode = "abs"
 cooldown = 5
 min_lr = 1e-8
-eps = 1e-8  # Optional, default is 1e-8
+eps = 1e-8                             # Optional, default is 1e-8
 # ------------------------------------------------------------ #
 [training.loss_parameter]
 loss_components = ['per_system_energy'] #, 'per_atom_force']

diff --git a/modelforge/tests/test_aimnet2.py b/modelforge/tests/test_aimnet2.py
@@ -152,70 +152,70 @@ def test_forward(single_batch_with_batchsize, prep_temp_dir):
 
     ref_per_system_energy = torch.tensor(
         [
-            [-1.6222e00],
-            [-1.7771e-01],
-            [1.5974e-01],
-            [-1.2089e-02],
-            [-1.8864e-01],
-            [-2.7185e-01],
-            [-4.3214e00],
-            [-1.3357e00],
-            [-1.1657e00],
-            [-1.4146e00],
-            [-1.8898e00],
-            [-1.1582e00],
-            [-9.1212e00],
-            [-4.8285e00],
-            [-5.0907e00],
-            [-5.4467e00],
-            [-1.8100e00],
-            [-4.9845e00],
-            [-3.7676e00],
-            [-2.5988e00],
-            [-1.5824e01],
-            [-1.0948e01],
-            [-2.8324e-01],
-            [-4.5179e-01],
-            [-6.8437e-01],
-            [-3.1547e-01],
-            [-5.7387e-01],
-            [-4.6788e-01],
-            [-1.9818e00],
-            [-3.8900e00],
-            [-4.2745e00],
-            [-2.8107e00],
-            [-1.2960e00],
-            [-1.5892e00],
-            [-5.7663e00],
-            [-4.2937e00],
-            [-3.0977e00],
-            [-2.2906e00],
-            [-1.4034e01],
-            [-9.6701e00],
-            [-7.9657e00],
-            [-6.4762e00],
-            [-9.7999e00],
-            [-5.6619e00],
-            [-9.1679e00],
-            [-6.8304e00],
-            [-1.0582e01],
-            [-6.0419e00],
-            [-7.2018e00],
-            [-5.0521e00],
-            [-4.0748e00],
-            [-3.5285e00],
-            [-2.5017e00],
-            [-2.5237e01],
-            [-1.9461e01],
-            [-1.7413e00],
-            [-2.1273e00],
-            [-2.5887e00],
-            [-1.1963e00],
-            [-2.4938e00],
-            [-3.1271e00],
-            [-1.7812e00],
-            [-8.0866e00],
-            [-8.7542e00],
+            [0.2630],
+            [-0.5150],
+            [-0.2999],
+            [-0.0297],
+            [-0.4382],
+            [-0.1805],
+            [0.5974],
+            [0.1769],
+            [0.0842],
+            [-0.2955],
+            [0.1295],
+            [-0.4067],
+            [0.4135],
+            [0.3202],
+            [0.2481],
+            [0.6696],
+            [0.0380],
+            [0.0834],
+            [-0.2613],
+            [-0.8373],
+            [0.2033],
+            [0.1554],
+            [0.0624],
+            [-0.3643],
+            [-0.7861],
+            [-0.0398],
+            [-0.4675],
+            [-0.1000],
+            [0.3265],
+            [0.2546],
+            [-0.1597],
+            [-0.9611],
+            [0.0653],
+            [-0.4411],
+            [0.2587],
+            [-0.1082],
+            [0.0461],
+            [0.0407],
+            [0.6725],
+            [0.3874],
+            [0.3393],
+            [0.1747],
+            [0.4048],
+            [0.1001],
+            [0.1496],
+            [0.2432],
+            [0.3578],
+            [0.2792],
+            [-0.3365],
+            [-0.3329],
+            [-0.8465],
+            [0.0463],
+            [-0.4385],
+            [0.1224],
+            [-0.0442],
+            [0.1029],
+            [-0.4559],
+            [-1.1701],
+            [-0.2714],
+            [0.0318],
+            [-0.8579],
+            [-0.3836],
+            [0.2487],
+            [-0.2728],
         ],
     )
 

diff --git a/modelforge/tests/test_potentials.py b/modelforge/tests/test_potentials.py
@@ -287,6 +287,20 @@ def test_energy_scaling_and_offset(
     assert torch.allclose(scaled_output["per_system_energy"], compare_to.sum())
 
 
+"""
+tensor([[-406.9472],
+        [-397.2831],
+        [-397.2831],
+        [-397.2831],
+        [-397.2831]])
+tensor([[-402.9324],
+        [-401.2677],
+        [-401.2677],
+        [-401.2683],
+        [-401.2684]])
+"""
+
+
 @pytest.mark.parametrize(
     "potential_name", _Implemented_NNPs.get_all_neural_network_names()
 )

diff --git a/modelforge/train/parameters.py b/modelforge/train/parameters.py
@@ -386,6 +386,9 @@ def ensure_logger_configuration(self) -> "ExperimentLogger":
     experiment_logger: ExperimentLogger
     verbose: bool = False
     log_norm: bool = False
+    limit_train_batches: Union[float, int, None] = None
+    limit_val_batches: Union[float, int, None] = None
+    limit_test_batches: Union[float, int, None] = None
     optimizer: Type[torch.optim.Optimizer] = torch.optim.AdamW
     min_number_of_epochs: Union[int, None] = None
 

diff --git a/modelforge/train/training.py b/modelforge/train/training.py
@@ -1810,8 +1810,11 @@ def setup_trainer(self) -> Trainer:
             callbacks=self.callbacks,
             benchmark=True,
             inference_mode=False,
-            num_sanity_val_steps=0,
-            gradient_clip_val=10.0,  # FIXME: hardcoded for now
+            limit_train_batches=self.training_parameter.limit_train_batches,
+            limit_val_batches=self.training_parameter.limit_val_batches,
+            limit_test_batches=self.training_parameter.limit_test_batches,
+            num_sanity_val_steps=1,
+            gradient_clip_val=5.0,  # FIXME: hardcoded for now
             log_every_n_steps=self.runtime_parameter.log_every_n_steps,
             enable_model_summary=True,
             enable_progress_bar=self.runtime_parameter.verbose,  # if true will show progress bar