You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Please:When the number of my GPUs is set to greater than 1, the following error will occur. How can I fix this BUG?
Traceback (most recent call last):
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 195, in
run_cli()
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 190, in run_cli
cli_main(args)
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 67, in cli_main
trainer.fit(model, datamodule=data_module)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 36, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 113, in launch
mp.start_processes(
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 2 terminated with the following error:
Traceback (most recent call last):
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 139, in _wrapping_function
results = function(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1112, in _run
results = self._run_stage()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1191, in _run_stage
self._run_train()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1204, in _run_train
self._run_sanity_check()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1276, in _run_sanity_check
val_loop.run()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py", line 152, in advance
dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 194, in run
self.on_run_start(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 84, in on_run_start
self._data_fetcher = iter(data_fetcher)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/fetching.py", line 178, in iter
self.dataloader_iter = iter(self.dataloader)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 444, in iter
return self._get_iterator()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 390, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1077, in init
w.start()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'SliceDataset.init..'
[1] Check for duplicate issues.
[2] Provide a simple example for how to reproduce the bug.
[3] If applicable, include full error messages/tracebacks.
The text was updated successfully, but these errors were encountered:
Hello @King-pand, I haven't been able to reproduce this. It looks like there's an issue with your code in terms of how you set up DistributedDataParallel, perhaps due to a cluster misconfiguration.
Hi @King-pand, I got the same error as yours. I found this error caused since pickle cannot take lambda function in a __init__ state of SliceDataset in mri_data.py. I solved this error by replacing self.raw_sample_filter = lambda raw_sample: True with a regular function as follows:
class filter_raw_sample():
def call(self, raw_sample):
return True
This class object should be built up ahead of SliceDataset object.
Please:When the number of my GPUs is set to greater than 1, the following error will occur. How can I fix this BUG?
Traceback (most recent call last):
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 195, in
run_cli()
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 190, in run_cli
cli_main(args)
File "/home/23-panjiawei/MRI_code/unet_knee_sc_leaderboard.py", line 67, in cli_main
trainer.fit(model, datamodule=data_module)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 36, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 113, in launch
mp.start_processes(
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 2 terminated with the following error:
Traceback (most recent call last):
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 139, in _wrapping_function
results = function(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1112, in _run
results = self._run_stage()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1191, in _run_stage
self._run_train()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1204, in _run_train
self._run_sanity_check()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1276, in _run_sanity_check
val_loop.run()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py", line 152, in advance
dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 194, in run
self.on_run_start(*args, **kwargs)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 84, in on_run_start
self._data_fetcher = iter(data_fetcher)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/pytorch_lightning/utilities/fetching.py", line 178, in iter
self.dataloader_iter = iter(self.dataloader)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 444, in iter
return self._get_iterator()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 390, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1077, in init
w.start()
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/home/23-panjiawei/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'SliceDataset.init..'
The text was updated successfully, but these errors were encountered: