Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zhwang/debug tp8 #34

Open
wants to merge 80 commits into
base: corvo
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
743369a
Merge with main (#1)
sfc-gh-ashankar Jul 11, 2023
da9ef99
commit
sfc-gh-zhwang Oct 31, 2023
7f1e8bf
commit
sfc-gh-zhwang Oct 31, 2023
bb6fce4
commit
sfc-gh-zhwang Oct 31, 2023
49c94e8
commit
sfc-gh-zhwang Oct 31, 2023
c510c26
commit
sfc-gh-zhwang Oct 31, 2023
8933482
commit
sfc-gh-zhwang Oct 31, 2023
98ab7df
commit
sfc-gh-zhwang Oct 31, 2023
626287a
commit
sfc-gh-zhwang Oct 31, 2023
787c1c5
commit
sfc-gh-zhwang Oct 31, 2023
06e941b
commit
sfc-gh-zhwang Oct 31, 2023
ce8272a
commit
sfc-gh-zhwang Oct 31, 2023
c6f2543
commit
sfc-gh-zhwang Oct 31, 2023
e1f2a76
commit
sfc-gh-zhwang Oct 31, 2023
441c343
commit
sfc-gh-zhwang Oct 31, 2023
3cf5490
commit
sfc-gh-zhwang Oct 31, 2023
38919b6
commit
sfc-gh-zhwang Oct 31, 2023
4c0dbba
commit
sfc-gh-zhwang Oct 31, 2023
b6945af
commit
sfc-gh-zhwang Oct 31, 2023
728f890
commit
sfc-gh-zhwang Oct 31, 2023
8aeb13a
commit
sfc-gh-zhwang Oct 31, 2023
ffd2f96
commit
sfc-gh-zhwang Oct 31, 2023
18eb7b4
commit
sfc-gh-zhwang Oct 31, 2023
28cba07
commit
sfc-gh-zhwang Oct 31, 2023
165704c
commit
sfc-gh-zhwang Oct 31, 2023
d792097
commit
sfc-gh-zhwang Oct 31, 2023
f2534be
commit
sfc-gh-zhwang Oct 31, 2023
67aa284
commit
sfc-gh-zhwang Oct 31, 2023
b415055
commit
sfc-gh-zhwang Oct 31, 2023
2ecae5e
commit
sfc-gh-zhwang Oct 31, 2023
455c6b8
commit
sfc-gh-zhwang Oct 31, 2023
9ecbfc9
commit
sfc-gh-zhwang Oct 31, 2023
cb8b38d
commit
sfc-gh-zhwang Oct 31, 2023
662d605
commit
sfc-gh-zhwang Oct 31, 2023
ec65161
commit
sfc-gh-zhwang Oct 31, 2023
d2c0e8f
commit
sfc-gh-zhwang Oct 31, 2023
7afad53
commit
sfc-gh-zhwang Oct 31, 2023
2804459
commit
sfc-gh-zhwang Oct 31, 2023
3e431f9
commit
sfc-gh-zhwang Oct 31, 2023
748aa0c
commit
sfc-gh-zhwang Oct 31, 2023
1f4210e
commit
sfc-gh-zhwang Oct 31, 2023
f1dac5c
commit
sfc-gh-zhwang Oct 31, 2023
40d7e8b
commit
sfc-gh-zhwang Oct 31, 2023
dd09060
commit
sfc-gh-zhwang Oct 31, 2023
f8f43a4
commit
sfc-gh-zhwang Oct 31, 2023
7bfb518
commit
sfc-gh-zhwang Oct 31, 2023
cfeec21
commit
sfc-gh-zhwang Oct 31, 2023
dadcc23
commit
sfc-gh-zhwang Oct 31, 2023
99ffd0e
commit
sfc-gh-zhwang Oct 31, 2023
a89d4b3
commit
sfc-gh-zhwang Oct 31, 2023
4973c9b
commit
sfc-gh-zhwang Oct 31, 2023
caf7c00
commit
sfc-gh-zhwang Oct 31, 2023
f7af369
commit
sfc-gh-zhwang Oct 31, 2023
4545f61
commit
sfc-gh-zhwang Oct 31, 2023
376110d
commit
sfc-gh-zhwang Oct 31, 2023
74df027
commit
sfc-gh-zhwang Oct 31, 2023
eaa0a17
commit
sfc-gh-zhwang Oct 31, 2023
580a796
commit
sfc-gh-zhwang Oct 31, 2023
9255f7c
commit
sfc-gh-zhwang Oct 31, 2023
debacbd
commit
sfc-gh-zhwang Oct 31, 2023
62e4177
commit
sfc-gh-zhwang Oct 31, 2023
4f14e32
commit
sfc-gh-zhwang Oct 31, 2023
34b48e8
commit
sfc-gh-zhwang Oct 31, 2023
596f6d9
commit
sfc-gh-zhwang Oct 31, 2023
5772f09
commit
sfc-gh-zhwang Oct 31, 2023
96ccec9
commit
sfc-gh-zhwang Oct 31, 2023
04f5ab2
commit
sfc-gh-zhwang Oct 31, 2023
c79afa9
commit
sfc-gh-zhwang Oct 31, 2023
dbd5287
commit
sfc-gh-zhwang Oct 31, 2023
599e8da
commit
sfc-gh-zhwang Oct 31, 2023
59f2c93
commit
sfc-gh-zhwang Oct 31, 2023
f330f2e
commit
sfc-gh-zhwang Oct 31, 2023
3ef5d24
commit
sfc-gh-zhwang Oct 31, 2023
8e57eb5
commit
sfc-gh-zhwang Oct 31, 2023
3e50243
commit
sfc-gh-zhwang Oct 31, 2023
87cfd58
commit
sfc-gh-zhwang Oct 31, 2023
09b5f45
commit
sfc-gh-zhwang Oct 31, 2023
407a868
commit
sfc-gh-zhwang Oct 31, 2023
6451b5f
Merge branch 'corvo', remote-tracking branch 'origin' into zhwang/deb…
sfc-gh-zhwang Oct 31, 2023
2d7be1a
commit
sfc-gh-zhwang Oct 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 1 addition & 21 deletions src/fastertransformer/models/llama/Llama.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ void Llama<T>::allocateBuffer(
// prompt_learning weight batch ptrs
prompt_learning_weight_batch_ =
(const T**)(allocator_->reMalloc(prompt_learning_weight_batch_, sizeof(T*) * batchxbeam, false));
tiled_prompt_lengths_buf_ =
(int*)(allocator_->reMalloc(tiled_prompt_lengths_buf_, sizeof(int) * batchxbeam, true));

tiled_input_ids_buf_ =
(int*)(allocator_->reMalloc(tiled_input_ids_buf_, sizeof(int) * batchxbeam * max_input_len, true));
Expand Down Expand Up @@ -204,7 +202,6 @@ void Llama<T>::freeBuffer()
}

allocator_->free((void**)(&prompt_learning_weight_batch_));
allocator_->free((void**)(&tiled_prompt_lengths_buf_));

allocator_->free((void**)(&tiled_input_ids_buf_));
allocator_->free((void**)(&tiled_input_lengths_buf_));
Expand Down Expand Up @@ -639,22 +636,6 @@ void Llama<T>::forward(std::unordered_map<std::string, Tensor>* output_ten
sync_check_cuda_error();
}

// Prefix prompts
if (has_prefix_prompt_) {
cudaMemcpyAsync(prompt_learning_weight_batch_,
prefix_prompt_weight_batch_ptrs.data(),
sizeof(T*) * batch_size * beam_width,
cudaMemcpyDefault,
stream_);
cudaMemcpyAsync(tiled_prompt_lengths_buf_,
prefix_prompt_lengths.data(),
sizeof(int) * batch_size * beam_width,
cudaMemcpyDefault,
stream_);
}

sync_check_cuda_error();

// handle first step
if (has_prefix_prompt_ || has_prefix_soft_prompt_ || max_input_length > 1) {
invokeTileGptInputs(tiled_input_ids_buf_,
Expand Down Expand Up @@ -707,7 +688,7 @@ void Llama<T>::forward(std::unordered_map<std::string, Tensor>* output_ten

invokeBuildDecoderAttentionMask(input_attention_mask_,
tiled_input_lengths_buf_,
tiled_prompt_lengths_buf_,
(const int*)nullptr, // prefix_prompt_lengths
batch_size * beam_width,
max_input_length,
max_prefix_prompt_length,
Expand Down Expand Up @@ -838,7 +819,6 @@ void Llama<T>::forward(std::unordered_map<std::string, Tensor>* output_ten

invokeMaskPaddingTokens(masked_tokens_,
input_tensors->at("input_lengths").getPtr<const int>(), // not_tiled
tiled_prompt_lengths_buf_,
max_cache_seq_len,
max_input_length + max_prefix_prompt_length,
0,
Expand Down