-
Notifications
You must be signed in to change notification settings - Fork 4
/
train.py
62 lines (48 loc) · 2.75 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Disables tensorflow loggings
from options import get_options
from data import create_dataset
from networks import create_model, get_model_options
from argparse import ArgumentParser as AP
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from util.callbacks import LogAndCheckpointEveryNSteps
from human_id import generate_id
def start(cmdline):
pl.trainer.seed_everything(cmdline.seed)
opt = get_options(cmdline)
opt.phase = 'train'
opt.seed = cmdline.seed
callbacks = []
logger = None
if not cmdline.debug:
logger = WandbLogger(name=cmdline.comment, save_dir="./experiments", project='fewshot-aw')
logger.log_hyperparams(opt)
callbacks.append(LogAndCheckpointEveryNSteps(save_step_frequency=opt.save_latest_freq,
viz_frequency=opt.display_freq,
log_frequency=opt.print_freq))
root_dir = './experiments'
else:
root_dir = os.path.join('/tmp', generate_id())
dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options
model = create_model(opt) # create a model given opt.model and other options
precision = 16 if cmdline.mixed_precision else 32
trainer = pl.Trainer(default_root_dir=os.path.join(root_dir, 'checkpoints'), callbacks=callbacks,
gpus=cmdline.gpus, logger=logger, precision=precision, amp_level='01')
trainer.fit(model, dataset)
if __name__ == '__main__':
ap = AP()
ap.add_argument('--id', default=None, type=str, help='Set an existing uuid to resume a training')
ap.add_argument('--debug', default=False, action='store_true', help='Disables experiment saving')
ap.add_argument('--comment', required=True, help='run identifier')
ap.add_argument('--gpus', default=[0], type=int, nargs='+', help='gpus to train on')
ap.add_argument('--model', default='comomunit', type=str, help='Choose model for training')
ap.add_argument('--dataset', default='anchor', type=str, help='Module name of the dataset importer')
ap.add_argument('--learning_rate', default=0.0001, type=float, help='Learning rate')
ap.add_argument('--scheduler_policy', default='step', type=str, help='Scheduler policy')
ap.add_argument('--decay_iters_step', default=200000, type=int, help='Decay iterations step')
ap.add_argument('--decay_step_gamma', default=0.5, type=float, help='Decay step gamma')
ap.add_argument('--seed', default=2, type=int, help='Random seed')
ap.add_argument('--mixed_precision', default=False, action='store_true', help='Use mixed precision to reduce memory usage')
start(ap.parse_args())