Skip to content

Commit

Permalink
fix bugs in moe (#41903)
Browse files Browse the repository at this point in the history
* fix moe apis (#41650)

* Moe ref (#41836)

* moe ref

* ref commit

* update; document_fix

* update;document_fix

* Moe ref (#41864)

* moe ref

* ref commit; document_fix

* update; document_fix

* update document_fix

* update; document_fix
  • Loading branch information
sljlp authored Apr 18, 2022
1 parent 3a2fb4c commit f92dbfb
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 6 deletions.
11 changes: 10 additions & 1 deletion paddle/fluid/operators/assign_pos_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,16 @@ Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License.
The file has been adapted from the two files:
https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu
https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh
Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
We retain the following license from the original files:
Copyright 2021, Jiaao He
Licensed under the Apache License, Version 2.0 (the "License").
*/

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/assign_pos_op.h"
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/operators/limit_by_capacity_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The file has been adapted from the two files:
// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu
// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh
// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
// We retain the following license from the original files:
// Copyright 2021, Jiaao He. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License").

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/limit_by_capacity_op.h"
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/operators/number_count_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The file has been adapted from the two files:
// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu
// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh
// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
// We retain the following license from the original files:
// Copyright 2021, Jiaao He. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License").

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/number_count_op.h"
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/operators/prune_gate_by_capacity_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The file has been adapted from the two files:
// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu
// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh
// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
// We retain the following license from the original files:
// Copyright 2021, Jiaao He. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License").

#include "paddle/fluid/operators/prune_gate_by_capacity_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/base_gate.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

import paddle.nn as nn

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/gshard_gate.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

import math
import paddle
Expand Down Expand Up @@ -62,6 +69,6 @@ def forward(self, x):
if self.random_routing:
rand_routing_prob = paddle.rand(
shape=[gate_score.shape[0]], dtype="float32")
topk_idx = paddle.distributed.utils.random_routing(
topk_idx = paddle.distributed.models.moe.utils._random_routing(
topk_idx, topk_val, rand_routing_prob)
return topk_val, topk_idx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,6 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/naive_gate.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

from .base_gate import BaseGate

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,6 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/switch_gate.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

import math
import paddle
Expand Down
5 changes: 5 additions & 0 deletions python/paddle/incubate/distributed/models/moe/grad_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
``need_clip`` of ``ClipGradyGlobalNorm`` HAS BEEN DEPRECATED since 2.0.
Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope.
Reference:
https://github.com/laekov/fastmoe/blob/master/examples/megatron/clip-grad-v2.2.patch
Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
Args:
clip_norm (float): The maximum norm value.
is_expert_param_func (function): a function to decide whether a param should be put into moe_params_grads
Expand Down
7 changes: 7 additions & 0 deletions python/paddle/incubate/distributed/models/moe/moe_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/layers.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

import collections
import math
Expand Down
13 changes: 11 additions & 2 deletions python/paddle/incubate/distributed/models/moe/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,7 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.distributed.models.moe.utils import *
#
# The file has been adapted from the file:
# https://github.com/laekov/fastmoe/blob/master/fmoe/functions.py
# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4
# We retain the following license from the original files:
# Copyright 2021, Jiaao He. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License").

from paddle.distributed.models.moe.utils import _number_count, _limit_by_capacity, _prune_gate_by_capacity, _assign_pos
import paddle


def _alltoall(in_tensor_list, group=None, use_calc_stream=True):
Expand Down

0 comments on commit f92dbfb

Please sign in to comment.