forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GPU] Initial RoPE version for ChatGLM
- Loading branch information
1 parent
639c155
commit 3c418e1
Showing
18 changed files
with
5,330 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/core/node.hpp" | ||
#include "openvino/op/op.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
namespace op { | ||
|
||
/** | ||
* The operation performs rotary positional embedding operation described in: | ||
* ROFORMER: ENHANCED TRANSFORMER WITH ROTARY POSITION EMBEDDING by Jianlin Su | ||
* | ||
* the core computation is application of 2x2 rotation matrix on basis | ||
* of pair of input states x[i0] & x[i1] to get the rotary embedded pair of output | ||
* states y[i0] and y[i1]: | ||
* | ||
* suppose dimension of hidden states (of each attention head) is N and d of which | ||
* are to be embedded (d <= N), non-embedded parts are copied into output. | ||
* | ||
* for i in 0...(d/2) | ||
* if (is_interleaved) { | ||
* // interleaving style of indexing | ||
* i0 = i*2 | ||
* i1 = i*2 + 1 | ||
* } else { | ||
* // rotate-half style of indexing | ||
* i0 = i | ||
* i1 = i + (d/2) | ||
* } | ||
* y[i0] = x[i0]*cos(m * xita[i]) - x[i1]*sin(m * xita[i]) | ||
* y[i1] = x[i1]*cos(m * xita[i]) + x[i0]*sin(m * xita[i]) | ||
* Note: m is token position of current input | ||
* | ||
* based on configuration, additional preprocessing steps maybe performed as well: | ||
* - slicing last dimension of input tensor | ||
* (when q/k/v are merged and only q or k part is to be extracted & embedded) | ||
* - transpose input tensor | ||
* (when q/k comes from fullyconnect has layout [batch, seq_len, head_cnt, head_dim] | ||
* but output of RoPE is required to be of layout [batch, head_cnt, seq_length, head_dims]) | ||
* - gather sin/cos from input tensor 2&3 using position index tensor passed through input 4 | ||
* | ||
* Inputs: | ||
* 1. Input hidden states tensor of type T1 - shape: | ||
* [batch, seq_length, head_cnt, head_dims] when input_trans0213 == false OR | ||
* [batch, head_cnt, seq_length, head_dims] when input_trans0213 == true | ||
* 2. pre-calculated cos(m*xita[n]) tensor of type T2 - shape [1, 1, max_position_embeddings, d]. | ||
* 3. pre-calculated sin(m*xita[n]) tensor of type T2 - shape [1, 1, max_position_embeddings, d]. | ||
* input 3 is combined with 2 when is_interleaved is true. | ||
* 4. postion index tensor of type T3 - shape [batch, 1, seq_length, 1 or d] OR [batch, seq_length] optional | ||
* Outputs: | ||
* 1. New embedding tensor of type T1 and of shape [batch, head_cnt, seq_length, head_dims] | ||
* Types: | ||
* T1 - FP32 or BF16 | ||
* T2 - FP32 | ||
* T3 - I32 | ||
*/ | ||
class RoPE : public ov::op::Op { | ||
public: | ||
OPENVINO_OP("RoPE", "gpu_opset"); | ||
|
||
RoPE() = default; | ||
|
||
struct Config { | ||
size_t slice_start = 0; // slice inner-most dimensions of input | ||
size_t slice_stop = 0; | ||
bool input_trans0213 = false; // transpose input dim 1&2 | ||
bool is_interleaved = false; // interleaved mode, implies trans0213 happens after RoPE | ||
size_t rotary_ndims = 0; // dimensions to be embedded (d in the description) | ||
bool is_chatglm = false; // chatglm is special which overrides other setting | ||
bool is_qwen = false; // Qwen is special which overrides other setting | ||
size_t head_cnt = 0; | ||
size_t head_size = 0; | ||
int gather_position_arg_id = 0; // arg id of position tensor, | ||
// == 3 when gather from sin/cos inputs according to position is required | ||
}; | ||
|
||
RoPE(const OutputVector& args, const Config& cfg); | ||
|
||
bool visit_attributes(ov::AttributeVisitor& visitor) override; | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override; | ||
|
||
const Config& get_config() const { | ||
return m_config; | ||
} | ||
|
||
Config& get_config() { | ||
return m_config; | ||
} | ||
|
||
private: | ||
Config m_config; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace intel_gpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
#include "primitive.hpp" | ||
#include "intel_gpu/op/rope.hpp" | ||
|
||
namespace cldnn { | ||
using RoPE = ov::intel_gpu::op::RoPE; | ||
|
||
/// @brief Rotary Position Embedding primitive | ||
struct rope : public primitive_base<rope> { | ||
CLDNN_DECLARE_PRIMITIVE(rope); | ||
|
||
rope() : primitive_base("", {}) {} | ||
|
||
/// @brief Constructs rope primitive | ||
/// @param id This primitive id | ||
/// @param inputs Inputs primitive id | ||
/// @param config | ||
rope(const primitive_id& id, | ||
const std::vector<input_info>& inputs, | ||
const RoPE::Config& config, | ||
const padding& output_padding = padding()) | ||
: primitive_base(id, inputs, {output_padding}), | ||
config(config) {} | ||
|
||
/// @brief | ||
RoPE::Config config; | ||
|
||
size_t hash() const override { | ||
size_t seed = primitive::hash(); | ||
seed = hash_combine(seed, config.gather_position_arg_id); | ||
seed = hash_combine(seed, config.head_cnt); | ||
seed = hash_combine(seed, config.head_size); | ||
seed = hash_combine(seed, config.input_trans0213); | ||
seed = hash_combine(seed, config.is_chatglm); | ||
seed = hash_combine(seed, config.is_interleaved); | ||
seed = hash_combine(seed, config.is_qwen); | ||
seed = hash_combine(seed, config.rotary_ndims); | ||
seed = hash_combine(seed, config.slice_start); | ||
seed = hash_combine(seed, config.slice_stop); | ||
return seed; | ||
} | ||
|
||
bool operator==(const primitive& rhs) const override { | ||
if (!compare_common_params(rhs)) | ||
return false; | ||
|
||
auto rhs_casted = downcast<const rope>(rhs); | ||
|
||
return config.gather_position_arg_id == rhs_casted.config.gather_position_arg_id; //TODO | ||
} | ||
|
||
void save(BinaryOutputBuffer& ob) const override { | ||
primitive_base<rope>::save(ob); | ||
ob << config.gather_position_arg_id; //TODO | ||
} | ||
|
||
void load(BinaryInputBuffer& ib) override { | ||
primitive_base<rope>::load(ib); | ||
ib >> config.gather_position_arg_id; //TODO | ||
} | ||
}; | ||
} // namespace cldnn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.