Skip to content

Commit

Permalink
Fix error for new trainings in LORA
Browse files Browse the repository at this point in the history
  • Loading branch information
hlinander committed Jan 6, 2024
1 parent 579c812 commit f982128
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions lib/distributed_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ def distributed_train(requested_configs: List[TrainRun] = None):
if distributed_train_run is not None:
try:
last_aquired_training = time.time()
if (
get_serialization_epoch(
DeserializeConfig(
train_run=distributed_train_run.train_run,
device_id=device_id,
)
serialized_epoch = get_serialization_epoch(
DeserializeConfig(
train_run=distributed_train_run.train_run,
device_id=device_id,
)
< distributed_train_run.train_run.epochs
)
if (
serialized_epoch is None
or serialized_epoch < distributed_train_run.train_run.epochs
):
do_train_run(distributed_train_run, device_id)
else:
Expand Down

0 comments on commit f982128

Please sign in to comment.