Skip to content

Commit

Permalink
[GPU] Initial RoPE version for ChatGLM
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyamin-Roman committed Jan 25, 2024
1 parent 639c155 commit 3c418e1
Show file tree
Hide file tree
Showing 18 changed files with 5,330 additions and 1 deletion.
104 changes: 104 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/op/rope.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/node.hpp"
#include "openvino/op/op.hpp"

namespace ov {
namespace intel_gpu {
namespace op {

/**
* The operation performs rotary positional embedding operation described in:
* ROFORMER: ENHANCED TRANSFORMER WITH ROTARY POSITION EMBEDDING by Jianlin Su
*
* the core computation is application of 2x2 rotation matrix on basis
* of pair of input states x[i0] & x[i1] to get the rotary embedded pair of output
* states y[i0] and y[i1]:
*
* suppose dimension of hidden states (of each attention head) is N and d of which
* are to be embedded (d <= N), non-embedded parts are copied into output.
*
* for i in 0...(d/2)
* if (is_interleaved) {
* // interleaving style of indexing
* i0 = i*2
* i1 = i*2 + 1
* } else {
* // rotate-half style of indexing
* i0 = i
* i1 = i + (d/2)
* }
* y[i0] = x[i0]*cos(m * xita[i]) - x[i1]*sin(m * xita[i])
* y[i1] = x[i1]*cos(m * xita[i]) + x[i0]*sin(m * xita[i])
* Note: m is token position of current input
*
* based on configuration, additional preprocessing steps maybe performed as well:
* - slicing last dimension of input tensor
* (when q/k/v are merged and only q or k part is to be extracted & embedded)
* - transpose input tensor
* (when q/k comes from fullyconnect has layout [batch, seq_len, head_cnt, head_dim]
* but output of RoPE is required to be of layout [batch, head_cnt, seq_length, head_dims])
* - gather sin/cos from input tensor 2&3 using position index tensor passed through input 4
*
* Inputs:
* 1. Input hidden states tensor of type T1 - shape:
* [batch, seq_length, head_cnt, head_dims] when input_trans0213 == false OR
* [batch, head_cnt, seq_length, head_dims] when input_trans0213 == true
* 2. pre-calculated cos(m*xita[n]) tensor of type T2 - shape [1, 1, max_position_embeddings, d].
* 3. pre-calculated sin(m*xita[n]) tensor of type T2 - shape [1, 1, max_position_embeddings, d].
* input 3 is combined with 2 when is_interleaved is true.
* 4. postion index tensor of type T3 - shape [batch, 1, seq_length, 1 or d] OR [batch, seq_length] optional
* Outputs:
* 1. New embedding tensor of type T1 and of shape [batch, head_cnt, seq_length, head_dims]
* Types:
* T1 - FP32 or BF16
* T2 - FP32
* T3 - I32
*/
class RoPE : public ov::op::Op {
public:
OPENVINO_OP("RoPE", "gpu_opset");

RoPE() = default;

struct Config {
size_t slice_start = 0; // slice inner-most dimensions of input
size_t slice_stop = 0;
bool input_trans0213 = false; // transpose input dim 1&2
bool is_interleaved = false; // interleaved mode, implies trans0213 happens after RoPE
size_t rotary_ndims = 0; // dimensions to be embedded (d in the description)
bool is_chatglm = false; // chatglm is special which overrides other setting
bool is_qwen = false; // Qwen is special which overrides other setting
size_t head_cnt = 0;
size_t head_size = 0;
int gather_position_arg_id = 0; // arg id of position tensor,
// == 3 when gather from sin/cos inputs according to position is required
};

RoPE(const OutputVector& args, const Config& cfg);

bool visit_attributes(ov::AttributeVisitor& visitor) override;

void validate_and_infer_types() override;

std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;

const Config& get_config() const {
return m_config;
}

Config& get_config() {
return m_config;
}

private:
Config m_config;
};

} // namespace op
} // namespace intel_gpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,4 @@ REGISTER_FACTORY(internal, RMS);
REGISTER_FACTORY(internal, GatherCompressed);
REGISTER_FACTORY(internal, KVCache);
REGISTER_FACTORY(internal, ReadValue);
REGISTER_FACTORY(internal, RoPE);
66 changes: 66 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#include "primitive.hpp"
#include "intel_gpu/op/rope.hpp"

namespace cldnn {
using RoPE = ov::intel_gpu::op::RoPE;

/// @brief Rotary Position Embedding primitive
struct rope : public primitive_base<rope> {
CLDNN_DECLARE_PRIMITIVE(rope);

rope() : primitive_base("", {}) {}

/// @brief Constructs rope primitive
/// @param id This primitive id
/// @param inputs Inputs primitive id
/// @param config
rope(const primitive_id& id,
const std::vector<input_info>& inputs,
const RoPE::Config& config,
const padding& output_padding = padding())
: primitive_base(id, inputs, {output_padding}),
config(config) {}

/// @brief
RoPE::Config config;

size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_combine(seed, config.gather_position_arg_id);
seed = hash_combine(seed, config.head_cnt);
seed = hash_combine(seed, config.head_size);
seed = hash_combine(seed, config.input_trans0213);
seed = hash_combine(seed, config.is_chatglm);
seed = hash_combine(seed, config.is_interleaved);
seed = hash_combine(seed, config.is_qwen);
seed = hash_combine(seed, config.rotary_ndims);
seed = hash_combine(seed, config.slice_start);
seed = hash_combine(seed, config.slice_stop);
return seed;
}

bool operator==(const primitive& rhs) const override {
if (!compare_common_params(rhs))
return false;

auto rhs_casted = downcast<const rope>(rhs);

return config.gather_position_arg_id == rhs_casted.config.gather_position_arg_id; //TODO
}

void save(BinaryOutputBuffer& ob) const override {
primitive_base<rope>::save(ob);
ob << config.gather_position_arg_id; //TODO
}

void load(BinaryInputBuffer& ib) override {
primitive_base<rope>::load(ib);
ib >> config.gather_position_arg_id; //TODO
}
};
} // namespace cldnn
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void register_implementations() {
REGISTER_CPU(broadcast);
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(rope);
}

} // namespace cpu
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "intel_gpu/primitives/broadcast.hpp"
#include "intel_gpu/primitives/tile.hpp"
#include "intel_gpu/primitives/select.hpp"
#include "intel_gpu/primitives/rope.hpp"

namespace cldnn {
namespace cpu {
Expand Down Expand Up @@ -53,6 +54,7 @@ REGISTER_CPU(reorder);
REGISTER_CPU(broadcast);
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(rope);

#undef REGISTER_CPU

Expand Down
Loading

0 comments on commit 3c418e1

Please sign in to comment.