You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
cuBLAS API failed with status 15
A: torch.Size([24, 4096]), B: torch.Size([4096, 4096]), C: (24, 4096); (lda, ldb, ldc): (c_int(768), c_int(131072), c_int(768)); (m, n, k): (c_int(24), c_int(4096), c_int(4096))
Traceback (most recent call last):
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/routes.py", line 384, in run_predict
output = await app.get_blocks().process_api(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/blocks.py", line 1020, in process_api
result = await self.call_function(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/blocks.py", line 844, in call_function
prediction = await anyio.to_thread.run_sync(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, *args)
File "/home/touhi/Desktop/llm/Alpaca-LoRA-Serve/app.py", line 58, in chat_batch
bot_responses = get_output_batch(
File "/home/touhi/Desktop/llm/Alpaca-LoRA-Serve/gen.py", line 22, in get_output_batch
generated_id = model.generate(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/peft/peft_model.py", line 581, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/generation/utils.py", line 1405, in generate
return self.greedy_search(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/generation/utils.py", line 2200, in greedy_search
outputs = self(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 765, in forward
outputs = self.model(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 614, in forward
layer_outputs = decoder_layer(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 309, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 209, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/peft/tuners/lora.py", line 522, in forward
result = super().forward(x)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/autograd/_functions.py", line 377, in forward
out32, Sout32 = F.igemmlt(C32A, state.CxB, SA, state.SB)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/functional.py", line 1410, in igemmlt
raise Exception('cublasLt ran into an error!')
Exception: cublasLt ran into an error!
error detected
The text was updated successfully, but these errors were encountered:
I am running on Ubuntu 22.04 with a 8GB GPU
The error as follows when giving the prompt:
cuBLAS API failed with status 15
A: torch.Size([24, 4096]), B: torch.Size([4096, 4096]), C: (24, 4096); (lda, ldb, ldc): (c_int(768), c_int(131072), c_int(768)); (m, n, k): (c_int(24), c_int(4096), c_int(4096))
Traceback (most recent call last):
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/routes.py", line 384, in run_predict
output = await app.get_blocks().process_api(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/blocks.py", line 1020, in process_api
result = await self.call_function(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/gradio/blocks.py", line 844, in call_function
prediction = await anyio.to_thread.run_sync(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, *args)
File "/home/touhi/Desktop/llm/Alpaca-LoRA-Serve/app.py", line 58, in chat_batch
bot_responses = get_output_batch(
File "/home/touhi/Desktop/llm/Alpaca-LoRA-Serve/gen.py", line 22, in get_output_batch
generated_id = model.generate(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/peft/peft_model.py", line 581, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/generation/utils.py", line 1405, in generate
return self.greedy_search(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/generation/utils.py", line 2200, in greedy_search
outputs = self(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 765, in forward
outputs = self.model(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 614, in forward
layer_outputs = decoder_layer(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 309, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 209, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/peft/tuners/lora.py", line 522, in forward
result = super().forward(x)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/autograd/_functions.py", line 377, in forward
out32, Sout32 = F.igemmlt(C32A, state.CxB, SA, state.SB)
File "/home/touhi/anaconda3/envs/alpaca/lib/python3.9/site-packages/bitsandbytes/functional.py", line 1410, in igemmlt
raise Exception('cublasLt ran into an error!')
Exception: cublasLt ran into an error!
error detected
The text was updated successfully, but these errors were encountered: