You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
when I use context parallel > 2 in long-context training
I have the error of flash attention problems
I use the docker of nemo
bug
hidden_states = self.decoder(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/transformer_block.py", line 383, in forward
hidden_states, context = layer(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/transformer_layer.py", line 178, in forward
attention_output_with_bias = self.self_attention(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/attention.py", line 315, in forward
core_attn_out = self.core_attention(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/custom_layers/transformer_engine.py", line 514, in forward
core_attn_out = super().forward(
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 3599, in forward
qkv_layout, query_layer, key_layer, value_layer = _get_qkv_layout(
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 1887, in _get_qkv_layout
q, k, v = [x.contiguous() for x in [q, k, v]]
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 1887, in
q, k, v = [x.contiguous() for x in [q, k, v]]
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/code/NeMO_megatron/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py", line 79, in main
trainer.fit(model)
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
call._call_and_handle_interrupt(
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 68, in _call_and_handle_interrupt
trainer._teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 1010, in _teardown
self.strategy.teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 419, in teardown
super().teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/parallel.py", line 133, in teardown
super().teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 537, in teardown
self.lightning_module.cpu()
File "/usr/local/lib/python3.10/dist-packages/lightning_fabric/utilities/device_dtype_mixin.py", line 82, in cpu
return super().cpu()
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 960, in cpu
return self._apply(lambda t: t.cpu())
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
[Previous line repeated 1 more time]
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 825, in _apply
param_applied = fn(param)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 960, in
return self._apply(lambda t: t.cpu())
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
The text was updated successfully, but these errors were encountered:
Describe the bug
when I use context parallel > 2 in long-context training
I have the error of flash attention problems
I use the docker of nemo
bug
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/transformer_block.py", line 383, in forward
hidden_states, context = layer(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/transformer_layer.py", line 178, in forward
attention_output_with_bias = self.self_attention(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/attention.py", line 315, in forward
core_attn_out = self.core_attention(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/megatron-lm/megatron/core/transformer/custom_layers/transformer_engine.py", line 514, in forward
core_attn_out = super().forward(
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 3599, in forward
qkv_layout, query_layer, key_layer, value_layer = _get_qkv_layout(
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 1887, in _get_qkv_layout
q, k, v = [x.contiguous() for x in [q, k, v]]
File "/usr/local/lib/python3.10/dist-packages/transformer_engine/pytorch/attention.py", line 1887, in
q, k, v = [x.contiguous() for x in [q, k, v]]
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with
TORCH_USE_CUDA_DSA
to enable device-side assertions.During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/code/NeMO_megatron/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py", line 79, in main
trainer.fit(model)
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
call._call_and_handle_interrupt(
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 68, in _call_and_handle_interrupt
trainer._teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 1010, in _teardown
self.strategy.teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 419, in teardown
super().teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/parallel.py", line 133, in teardown
super().teardown()
File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 537, in teardown
self.lightning_module.cpu()
File "/usr/local/lib/python3.10/dist-packages/lightning_fabric/utilities/device_dtype_mixin.py", line 82, in cpu
return super().cpu()
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 960, in cpu
return self._apply(lambda t: t.cpu())
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
[Previous line repeated 1 more time]
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 825, in _apply
param_applied = fn(param)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 960, in
return self._apply(lambda t: t.cpu())
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with
TORCH_USE_CUDA_DSA
to enable device-side assertions.The text was updated successfully, but these errors were encountered: