You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When mmaigc was first released, I reproduced the dreambooth and did not encounter this problem... But today, when I cloned the latest code experiment, I encountered a dtype-related problem. The complete error report is as follows:
/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/diffusers/configuration_utils.py:135: FutureWarning: Accessing config attribute `num_train_timesteps` directly via 'DDPMScheduler' object attribute is deprecated. Please access 'num_train_timesteps' over 'DDPMScheduler's config object instead, e.g. 'scheduler.config.num_train_timesteps'.
deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /root/modelscope/mmagic/tools/train.py:114 in <module> │
│ │
│ 111 │
│ 112 │
│ 113 if __name__ == '__main__': │
│ ❱ 114 │ main() │
│ 115 │
│ │
│ /root/modelscope/mmagic/tools/train.py:107 in main │
│ │
│ 104 │ print_colored_log(f'Log directory: {runner._log_dir}') │
│ 105 │ │
│ 106 │ # start training │
│ ❱ 107 │ runner.train() │
│ 108 │ │
│ 109 │ print_colored_log(f'Log saved under {runner._log_dir}') │
│ 110 │ print_colored_log(f'Checkpoint saved under {cfg.work_dir}') │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/runner.py:1721 in train │
│ │
│ 1718 │ │ # This must be called **AFTER** model has been wrapped. │
│ 1719 │ │ self._maybe_compile('train_step') │
│ 1720 │ │ │
│ ❱ 1721 │ │ model = self.train_loop.run() # type: ignore │
│ 1722 │ │ self.call_hook('after_run') │
│ 1723 │ │ return model │
│ 1724 │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/loops.py:278 in run │
│ │
│ 275 │ │ │ self.runner.model.train() │
│ 276 │ │ │ │
│ 277 │ │ │ data_batch = next(self.dataloader_iterator) │
│ ❱ 278 │ │ │ self.run_iter(data_batch) │
│ 279 │ │ │ │
│ 280 │ │ │ self._decide_current_val_interval() │
│ 281 │ │ │ if (self.runner.val_loop is not None │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/loops.py:302 in run_iter │
│ │
│ 299 │ │ # synchronization during gradient accumulation process. │
│ 300 │ │ # outputs should be a dict of loss. │
│ 301 │ │ outputs = self.runner.model.train_step( │
│ ❱ 302 │ │ │ data_batch, optim_wrapper=self.runner.optim_wrapper) │
│ 303 │ │ │
│ 304 │ │ self.runner.call_hook( │
│ 305 │ │ │ 'after_train_iter', │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/model/wrappers/seperate_distrib │
│ uted.py:102 in train_step │
│ │
│ 99 │ │ Returns: │
│ 100 │ │ │ Dict[str, torch.Tensor]: A dict of tensor for logging. │
│ 101 │ │ """ │
│ ❱ 102 │ │ return self.module.train_step(data, optim_wrapper) │
│ 103 │ │
│ 104 │ def val_step(self, data: Union[dict, tuple, list]) -> list: │
│ 105 │ │ """Gets the prediction of module during validation process. │
│ │
│ /root/modelscope/mmagic/mmagic/models/editors/dreambooth/dreambooth.py:293 in train_step │
│ │
│ 290 │ │ │ model_output = self.unet( │
│ 291 │ │ │ │ noisy_latents.float(), │
│ 292 │ │ │ │ timesteps, │
│ ❱ 293 │ │ │ │ encoder_hidden_states=encoder_hidden_states.float()) │
│ 294 │ │ │ model_pred = model_output['sample'] │
│ 295 │ │ │ │
│ 296 │ │ │ loss_dict = dict() │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/parallel/distributed.py:1040 in │
│ forward │
│ │
│ 1037 │ │ │ │ # Notify joined ranks whether they should sync in backwards pass or not. │
│ 1038 │ │ │ │ self._check_global_requires_backward_grad_sync(is_joined_rank=False) │
│ 1039 │ │ │ │
│ ❱ 1040 │ │ │ output = self._run_ddp_forward(*inputs, **kwargs) │
│ 1041 │ │ │ │
│ 1042 │ │ │ # sync params according to location (before/after forward) user │
│ 1043 │ │ │ # specified as part of hook, if hook was specified. │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/parallel/distributed.py:1000 in │
│ _run_ddp_forward │
│ │
│ 997 │ │ │ │ self.use_side_stream_for_tensor_copies │
│ 998 │ │ │ ) │
│ 999 │ │ │ with self._inside_ddp_forward(): │
│ ❱ 1000 │ │ │ │ return module_to_run(*inputs[0], **kwargs[0]) │
│ 1001 │ │ else: │
│ 1002 │ │ │ with self._inside_ddp_forward(): │
│ 1003 │ │ │ │ return module_to_run(*inputs, **kwargs) │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/modelscope/mmagic/mmagic/models/archs/wrapper.py:179 in forward │
│ │
│ 176 │ │ Returns: │
│ 177 │ │ │ Any: The output of wrapped module's forward function. │
│ 178 │ │ """ │
│ ❱ 179 │ │ return self.model(*args, **kwargs) │
│ 180 │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/diffusers/models/unet_2d_condition.py:71 │
│ 8 in forward │
│ │
│ 715 │ │ │ encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states) │
│ 716 │ │ │
│ 717 │ │ # 2. pre-process │
│ ❱ 718 │ │ sample = self.conv_in(sample) │
│ 719 │ │ │
│ 720 │ │ # 3. down │
│ 721 │ │ down_block_res_samples = (sample,) │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/conv.py:463 in forward │
│ │
│ 460 │ │ │ │ │ │ self.padding, self.dilation, self.groups) │
│ 461 │ │
│ 462 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 463 │ │ return self._conv_forward(input, self.weight, self.bias) │
│ 464 │
│ 465 class Conv3d(_ConvNd): │
│ 466 │ __doc__ = r"""Applies a 3D convolution over an input signal composed of several inpu │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/conv.py:460 in │
│ _conv_forward │
│ │
│ 457 │ │ │ │ │ │ │ weight, bias, self.stride, │
│ 458 │ │ │ │ │ │ │ _pair(0), self.dilation, self.groups) │
│ 459 │ │ return F.conv2d(input, weight, bias, self.stride, │
│ ❱ 460 │ │ │ │ │ │ self.padding, self.dilation, self.groups) │
│ 461 │ │
│ 462 │ def forward(self, input: Tensor) -> Tensor: │
│ 463 │ │ return self._conv_forward(input, self.weight, self.bias) │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Input type (float) and bias type (c10::Half) should be the same
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 49302) of binary: /root/anaconda3/envs/mmagic/bin/python
Traceback (most recent call last):
File "/root/anaconda3/envs/mmagic/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/root/anaconda3/envs/mmagic/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/launch.py", line 195, in <module>
main()
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/launch.py", line 191, in main
launch(args)
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/launch.py", line 176, in launch
run(args)
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/run.py", line 756, in run
)(*cmd_args)
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 132, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 248, in launch_agent
failures=result.failures,
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
Reproduces the problem - code sample
When mmaigc was first released, I reproduced the dreambooth and did not encounter this problem... But today, when I cloned the latest code experiment, I encountered a dtype-related problem. The complete error report is as follows:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /root/modelscope/mmagic/tools/train.py:114 in <module> │
│ │
│ 111 │
│ 112 │
│ 113 if __name__ == '__main__': │
│ ❱ 114 │ main() │
│ 115 │
│ │
│ /root/modelscope/mmagic/tools/train.py:107 in main │
│ │
│ 104 │ print_colored_log(f'Log directory: {runner._log_dir}') │
│ 105 │ │
│ 106 │ # start training │
│ ❱ 107 │ runner.train() │
│ 108 │ │
│ 109 │ print_colored_log(f'Log saved under {runner._log_dir}') │
│ 110 │ print_colored_log(f'Checkpoint saved under {cfg.work_dir}') │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/runner.py:1721 in train │
│ │
│ 1718 │ │ # This must be called **AFTER** model has been wrapped. │
│ 1719 │ │ self._maybe_compile('train_step') │
│ 1720 │ │ │
│ ❱ 1721 │ │ model = self.train_loop.run() # type: ignore │
│ 1722 │ │ self.call_hook('after_run') │
│ 1723 │ │ return model │
│ 1724 │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/loops.py:278 in run │
│ │
│ 275 │ │ │ self.runner.model.train() │
│ 276 │ │ │ │
│ 277 │ │ │ data_batch = next(self.dataloader_iterator) │
│ ❱ 278 │ │ │ self.run_iter(data_batch) │
│ 279 │ │ │ │
│ 280 │ │ │ self._decide_current_val_interval() │
│ 281 │ │ │ if (self.runner.val_loop is not None │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/runner/loops.py:302 in run_iter │
│ │
│ 299 │ │ # synchronization during gradient accumulation process. │
│ 300 │ │ # outputs should be a dict of loss. │
│ 301 │ │ outputs = self.runner.model.train_step( │
│ ❱ 302 │ │ │ data_batch, optim_wrapper=self.runner.optim_wrapper) │
│ 303 │ │ │
│ 304 │ │ self.runner.call_hook( │
│ 305 │ │ │ 'after_train_iter', │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/mmengine/model/wrappers/seperate_distrib │
│ uted.py:102 in train_step │
│ │
│ 99 │ │ Returns: │
│ 100 │ │ │ Dict[str, torch.Tensor]: A dict of tensor for logging. │
│ 101 │ │ """ │
│ ❱ 102 │ │ return self.module.train_step(data, optim_wrapper) │
│ 103 │ │
│ 104 │ def val_step(self, data: Union[dict, tuple, list]) -> list: │
│ 105 │ │ """Gets the prediction of module during validation process. │
│ │
│ /root/modelscope/mmagic/mmagic/models/editors/dreambooth/dreambooth.py:293 in train_step │
│ │
│ 290 │ │ │ model_output = self.unet( │
│ 291 │ │ │ │ noisy_latents.float(), │
│ 292 │ │ │ │ timesteps, │
│ ❱ 293 │ │ │ │ encoder_hidden_states=encoder_hidden_states.float()) │
│ 294 │ │ │ model_pred = model_output['sample'] │
│ 295 │ │ │ │
│ 296 │ │ │ loss_dict = dict() │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/parallel/distributed.py:1040 in │
│ forward │
│ │
│ 1037 │ │ │ │ # Notify joined ranks whether they should sync in backwards pass or not. │
│ 1038 │ │ │ │ self._check_global_requires_backward_grad_sync(is_joined_rank=False) │
│ 1039 │ │ │ │
│ ❱ 1040 │ │ │ output = self._run_ddp_forward(*inputs, **kwargs) │
│ 1041 │ │ │ │
│ 1042 │ │ │ # sync params according to location (before/after forward) user │
│ 1043 │ │ │ # specified as part of hook, if hook was specified. │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/parallel/distributed.py:1000 in │
│ _run_ddp_forward │
│ │
│ 997 │ │ │ │ self.use_side_stream_for_tensor_copies │
│ 998 │ │ │ ) │
│ 999 │ │ │ with self._inside_ddp_forward(): │
│ ❱ 1000 │ │ │ │ return module_to_run(*inputs[0], **kwargs[0]) │
│ 1001 │ │ else: │
│ 1002 │ │ │ with self._inside_ddp_forward(): │
│ 1003 │ │ │ │ return module_to_run(*inputs, **kwargs) │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/modelscope/mmagic/mmagic/models/archs/wrapper.py:179 in forward │
│ │
│ 176 │ │ Returns: │
│ 177 │ │ │ Any: The output of wrapped module's forward function. │
│ 178 │ │ """ │
│ ❱ 179 │ │ return self.model(*args, **kwargs) │
│ 180 │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/diffusers/models/unet_2d_condition.py:71 │
│ 8 in forward │
│ │
│ 715 │ │ │ encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states) │
│ 716 │ │ │
│ 717 │ │ # 2. pre-process │
│ ❱ 718 │ │ sample = self.conv_in(sample) │
│ 719 │ │ │
│ 720 │ │ # 3. down │
│ 721 │ │ down_block_res_samples = (sample,) │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/module.py:1194 in │
│ _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/conv.py:463 in forward │
│ │
│ 460 │ │ │ │ │ │ self.padding, self.dilation, self.groups) │
│ 461 │ │
│ 462 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 463 │ │ return self._conv_forward(input, self.weight, self.bias) │
│ 464 │
│ 465 class Conv3d(_ConvNd): │
│ 466 │ __doc__ = r"""Applies a 3D convolution over an input signal composed of several inpu │
│ │
│ /root/anaconda3/envs/mmagic/lib/python3.7/site-packages/torch/nn/modules/conv.py:460 in │
│ _conv_forward │
│ │
│ 457 │ │ │ │ │ │ │ weight, bias, self.stride, │
│ 458 │ │ │ │ │ │ │ _pair(0), self.dilation, self.groups) │
│ 459 │ │ return F.conv2d(input, weight, bias, self.stride, │
│ ❱ 460 │ │ │ │ │ │ self.padding, self.dilation, self.groups) │
│ 461 │ │
│ 462 │ def forward(self, input: Tensor) -> Tensor: │
│ 463 │ │ return self._conv_forward(input, self.weight, self.bias) │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Input type (float) and bias type (c10::Half) should be the same
Reproduces the problem - command or script
ditto
Reproduces the problem - error message
ditto
Additional information
No response
The text was updated successfully, but these errors were encountered:
Prerequisite
Task
I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
Branch
main branch https://github.com/open-mmlab/mmagic
Environment
When mmaigc was first released, I reproduced the dreambooth and did not encounter this problem... But today, when I cloned the latest code experiment, I encountered a dtype-related problem. The complete error report is as follows:
Reproduces the problem - code sample
When mmaigc was first released, I reproduced the dreambooth and did not encounter this problem... But today, when I cloned the latest code experiment, I encountered a dtype-related problem. The complete error report is as follows:
Reproduces the problem - command or script
ditto
Reproduces the problem - error message
ditto
Additional information
No response
The text was updated successfully, but these errors were encountered: