forked from kylesargent/ZeroNVS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
zeronvs_config.yaml
155 lines (155 loc) · 3.51 KB
/
zeronvs_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
model:
base_learning_rate: 0.0001
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.012
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image_target
cond_stage_key: image_cond
image_size: 32
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
monitor: val/loss_simple_ema
scale_factor: 0.18215
conditioning_config:
params:
mode: 7dof_quantile_scale
embedding_dim: 19
depth_model_name: midas
scheduler_config:
target: ldm.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps:
- 100
cycle_lengths:
- 10000000000000
f_start:
- 1.0e-06
f_max:
- 1.0
f_min:
- 1.0
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 8
out_channels: 4
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
num_heads: 8
use_spatial_transformer: true
transformer_depth: 1
context_dim: 768
use_checkpoint: true
legacy: false
eval_config:
params:
scale: 3.0
ddim_steps: 100
ddim_eta: 1.0
lpips_model_path: null
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenCLIPImageEmbedder
data:
target: ldm.data.simple.WDSGenericDataModule
params:
train_config:
batch_size: 48
num_workers: 6
shuffle_buffer_size: 500
prefetch_factor: 4
dataset_config_1:
dataset_n_shards: 127
dataset_name: co3d
views_per_scene: 100
dataset_n_scenes: 18432
rate: 0.025
probability: 0.34
dataset_url: null
dataset_config_2:
dataset_n_shards: 127
dataset_name: re10k
views_per_scene: 200
dataset_n_scenes: 65280
probability: 0.33
rate: 0.025
dataset_url: null
dataset_config_3:
dataset_n_shards: 127
dataset_name: acid
views_per_scene: 100
dataset_n_scenes: 12032
probability: 0.33
rate: 0.025
dataset_url: null
val_config:
batch_size: 4
subsample: 1.0
scene_scale: 1.0
dataset_n_shards: 1
dataset_name: co3d
dataset_n_scenes: 150
num_workers: 1
shuffle_buffer_size: 20
rate: 0.1
dataset_url: null
--lightning:
trainer:
accumulate_grad_batches: 4
modelcheckpoint:
params:
every_n_train_steps: 2500
--data:
params:
train_config:
batch_size: 48
val_config:
batch_size: 4
--model:
params:
conditioning_config:
params:
mode: 7dof_quantile_scale
embedding_dim: 19
eval_config:
params:
ddim_steps: 100
base_learning_rate: 0.0001
--args:
finetune_from: null