Skip to content

Commit

Permalink
Merge branch 'main' into dev-memory-profiling-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
wiederm authored Nov 9, 2024
2 parents f907f83 + 426171a commit 1df3e1f
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 85 deletions.
4 changes: 3 additions & 1 deletion modelforge/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,9 @@ def prepare_data(
raise FileNotFoundError(
f"Dataset statistics file {self.dataset_statistic_filename} not found. Please regenerate the cache."
)
log.info('Processed dataset already exists. Skipping "prepare_data" step.')
log.info(
f'Processed dataset already exists: {self.cache_processed_dataset_filename}. Skipping "prepare_data" step.'
)
return None

# if the dataset is not already processed, process it
Expand Down
25 changes: 14 additions & 11 deletions modelforge/potential/aimnet2.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
[
AIMNet2InteractionModule(
number_of_per_atom_features=number_of_per_atom_features,
number_of_radial_basis_functions=number_of_radial_basis_functions,
number_of_vector_features=number_of_vector_features,
activation_function=self.activation_function,
is_first_module=(i == 0),
Expand Down Expand Up @@ -238,6 +239,7 @@ class AIMNet2InteractionModule(nn.Module):
def __init__(
self,
number_of_per_atom_features: int,
number_of_radial_basis_functions: int,
number_of_vector_features: int,
activation_function: nn.Module,
is_first_module: bool = False,
Expand All @@ -246,12 +248,15 @@ def __init__(
self.is_first_module = is_first_module
self.number_of_per_atom_features = number_of_per_atom_features
self.number_of_vector_features = number_of_vector_features
self.gs_to_fatom = Dense(
number_of_radial_basis_functions, number_of_per_atom_features, bias=False
)

if not self.is_first_module:
self.number_of_input_features = (
number_of_per_atom_features # radial_contributions_emb
+ number_of_vector_features # vector_contributions_emb
+ 1 # radial_contributions_charge (from charges)
+ number_of_per_atom_features # radial_contributions_charge
+ number_of_vector_features # vector_contributions_charge
)
else:
Expand Down Expand Up @@ -293,32 +298,30 @@ def calculate_radial_contributions(
gs : Tensor
Radial symmetry functions with shape (number_of_pairs, G).
a_j : Tensor
Atomic features for each pair with shape (number_of_pairs,
F_atom).
Atomic features for each pair with shape (number_of_pairs, F_atom) or (number_of_pairs, 1).
number_of_atoms : int
Total number of atoms in the system.
idx_j : Tensor
Indices mapping each pair to an atom, with shape
(number_of_pairs,).
Indices mapping each pair to an atom, with shape (number_of_pairs,).
Returns
-------
Tensor
Radial contributions aggregated per atom, with shape
(number_of_atoms, F_atom).
Radial contributions aggregated per atom, with shape (number_of_atoms, F_atom).
"""
# Compute radial contributions
avf_s = gs.unsqueeze(-1) * a_j.unsqueeze(1) # (number_of_pairs, G, F_atom)
# Map gs to shape (number_of_pairs, F_atom)
mapped_gs = self.gs_to_fatom(gs) # Shape: (number_of_pairs, F_atom)

# Sum over G (if necessary)
avf_s = avf_s.sum(dim=1) # Adjust if needed
# Compute avf_s using element-wise multiplication
avf_s = a_j * mapped_gs # Shape: (number_of_pairs, F_atom)

# Initialize tensor to accumulate radial contributions
radial_contributions = torch.zeros(
(number_of_atoms, avf_s.shape[-1]),
device=avf_s.device,
dtype=avf_s.dtype,
)
# Aggregate per atom
radial_contributions.index_add_(0, idx_j, avf_s)

return radial_contributions
Expand Down
2 changes: 2 additions & 0 deletions modelforge/potential/potential.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,8 @@ def generate_trainer(
seed_random_number(potential_seed)

log.debug(f"{training_parameter=}")
log.debug(f"{potential_parameter=}")
log.debug(f"{runtime_parameter=}")
log.debug(f"{dataset_parameter=}")

trainer = PotentialTrainer(
Expand Down
8 changes: 3 additions & 5 deletions modelforge/tests/data/potential_defaults/aimnet2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
potential_name = "AimNet2"

[potential.core_parameter]
number_of_radial_basis_functions = 32
number_of_vector_features = 5
number_of_radial_basis_functions = 64
number_of_vector_features = 8
maximum_interaction_radius = "5.0 angstrom"
number_of_interaction_modules = 3
predicted_properties = ["per_atom_energy"]
Expand All @@ -16,13 +16,11 @@ activation_function_name = "GeLU"
properties_to_featurize = ['atomic_number']
[potential.core_parameter.featurization.atomic_number]
maximum_atomic_number = 101
number_of_per_atom_features = 32
number_of_per_atom_features = 64

[potential.postprocessing_parameter]
properties_to_process = ['per_atom_energy']
[potential.postprocessing_parameter.per_atom_energy]
normalize = true
from_atom_to_system_reduction = true
keep_per_atom_property = true
[potential.postprocessing_parameter.general_postprocessing_operation]
calculate_molecular_self_energy = true
4 changes: 2 additions & 2 deletions modelforge/tests/data/training_defaults/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ save_dir = "logs"
# ------------------------------------------------------------ #
[training.experiment_logger.wandb_configuration]
save_dir = "logs"
project = "training_potentials"
project = "tests"
group = "exp00"
log_model = true
job_type = "testing"
Expand All @@ -34,7 +34,7 @@ threshold = 0.1
threshold_mode = "abs"
cooldown = 5
min_lr = 1e-8
eps = 1e-8 # Optional, default is 1e-8
eps = 1e-8 # Optional, default is 1e-8
# ------------------------------------------------------------ #
[training.loss_parameter]
loss_components = ['per_system_energy'] #, 'per_atom_force']
Expand Down
128 changes: 64 additions & 64 deletions modelforge/tests/test_aimnet2.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,70 +152,70 @@ def test_forward(single_batch_with_batchsize, prep_temp_dir):

ref_per_system_energy = torch.tensor(
[
[-1.6222e00],
[-1.7771e-01],
[1.5974e-01],
[-1.2089e-02],
[-1.8864e-01],
[-2.7185e-01],
[-4.3214e00],
[-1.3357e00],
[-1.1657e00],
[-1.4146e00],
[-1.8898e00],
[-1.1582e00],
[-9.1212e00],
[-4.8285e00],
[-5.0907e00],
[-5.4467e00],
[-1.8100e00],
[-4.9845e00],
[-3.7676e00],
[-2.5988e00],
[-1.5824e01],
[-1.0948e01],
[-2.8324e-01],
[-4.5179e-01],
[-6.8437e-01],
[-3.1547e-01],
[-5.7387e-01],
[-4.6788e-01],
[-1.9818e00],
[-3.8900e00],
[-4.2745e00],
[-2.8107e00],
[-1.2960e00],
[-1.5892e00],
[-5.7663e00],
[-4.2937e00],
[-3.0977e00],
[-2.2906e00],
[-1.4034e01],
[-9.6701e00],
[-7.9657e00],
[-6.4762e00],
[-9.7999e00],
[-5.6619e00],
[-9.1679e00],
[-6.8304e00],
[-1.0582e01],
[-6.0419e00],
[-7.2018e00],
[-5.0521e00],
[-4.0748e00],
[-3.5285e00],
[-2.5017e00],
[-2.5237e01],
[-1.9461e01],
[-1.7413e00],
[-2.1273e00],
[-2.5887e00],
[-1.1963e00],
[-2.4938e00],
[-3.1271e00],
[-1.7812e00],
[-8.0866e00],
[-8.7542e00],
[0.2630],
[-0.5150],
[-0.2999],
[-0.0297],
[-0.4382],
[-0.1805],
[0.5974],
[0.1769],
[0.0842],
[-0.2955],
[0.1295],
[-0.4067],
[0.4135],
[0.3202],
[0.2481],
[0.6696],
[0.0380],
[0.0834],
[-0.2613],
[-0.8373],
[0.2033],
[0.1554],
[0.0624],
[-0.3643],
[-0.7861],
[-0.0398],
[-0.4675],
[-0.1000],
[0.3265],
[0.2546],
[-0.1597],
[-0.9611],
[0.0653],
[-0.4411],
[0.2587],
[-0.1082],
[0.0461],
[0.0407],
[0.6725],
[0.3874],
[0.3393],
[0.1747],
[0.4048],
[0.1001],
[0.1496],
[0.2432],
[0.3578],
[0.2792],
[-0.3365],
[-0.3329],
[-0.8465],
[0.0463],
[-0.4385],
[0.1224],
[-0.0442],
[0.1029],
[-0.4559],
[-1.1701],
[-0.2714],
[0.0318],
[-0.8579],
[-0.3836],
[0.2487],
[-0.2728],
],
)

Expand Down
14 changes: 14 additions & 0 deletions modelforge/tests/test_potentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,20 @@ def test_energy_scaling_and_offset(
assert torch.allclose(scaled_output["per_system_energy"], compare_to.sum())


"""
tensor([[-406.9472],
[-397.2831],
[-397.2831],
[-397.2831],
[-397.2831]])
tensor([[-402.9324],
[-401.2677],
[-401.2677],
[-401.2683],
[-401.2684]])
"""


@pytest.mark.parametrize(
"potential_name", _Implemented_NNPs.get_all_neural_network_names()
)
Expand Down
3 changes: 3 additions & 0 deletions modelforge/train/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ def ensure_logger_configuration(self) -> "ExperimentLogger":
experiment_logger: ExperimentLogger
verbose: bool = False
log_norm: bool = False
limit_train_batches: Union[float, int, None] = None
limit_val_batches: Union[float, int, None] = None
limit_test_batches: Union[float, int, None] = None
optimizer: Type[torch.optim.Optimizer] = torch.optim.AdamW
min_number_of_epochs: Union[int, None] = None

Expand Down
7 changes: 5 additions & 2 deletions modelforge/train/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1810,8 +1810,11 @@ def setup_trainer(self) -> Trainer:
callbacks=self.callbacks,
benchmark=True,
inference_mode=False,
num_sanity_val_steps=0,
gradient_clip_val=10.0, # FIXME: hardcoded for now
limit_train_batches=self.training_parameter.limit_train_batches,
limit_val_batches=self.training_parameter.limit_val_batches,
limit_test_batches=self.training_parameter.limit_test_batches,
num_sanity_val_steps=1,
gradient_clip_val=5.0, # FIXME: hardcoded for now
log_every_n_steps=self.runtime_parameter.log_every_n_steps,
enable_model_summary=True,
enable_progress_bar=self.runtime_parameter.verbose, # if true will show progress bar
Expand Down

0 comments on commit 1df3e1f

Please sign in to comment.