forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fused_linear_param_grad infer spmd (PaddlePaddle#60144)
* polish * fused_linear_param_grad_add * fused_linear_param_grad_add * polish * add integration test * polish * format * polish * polish
- Loading branch information
1 parent
69f11cb
commit 03e64a4
Showing
12 changed files
with
472 additions
and
37 deletions.
There are no files selected for viewing
77 changes: 77 additions & 0 deletions
77
paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.h" | ||
#include "paddle/phi/core/enforce.h" | ||
#include "paddle/phi/infermeta/spmd_rules/matmul.h" | ||
#include "paddle/phi/infermeta/spmd_rules/utils.h" | ||
|
||
namespace phi { | ||
namespace distributed { | ||
|
||
SpmdInfo FusedLinearParamGradAddInferSpmd(const DistMetaTensor& x, | ||
const DistMetaTensor& dout, | ||
const DistMetaTensor& dweight, | ||
const DistMetaTensor& dbias, | ||
bool multi_precision, | ||
bool has_bias) { | ||
auto dy_spmd_info = | ||
MatmulInferSpmd(x, dout, /*trans_x=*/true, /*trans_y=*/false); | ||
auto& x_dist_attr = PADDLE_GET_CONST(TensorDistAttr, dy_spmd_info.first[0]); | ||
auto& dout_dist_attr = | ||
PADDLE_GET_CONST(TensorDistAttr, dy_spmd_info.first[1]); | ||
auto weight_grad_dist_attr = | ||
PADDLE_GET_CONST(TensorDistAttr, dy_spmd_info.second[0]); | ||
|
||
weight_grad_dist_attr = ReduceGradBroadCastDims(2, weight_grad_dist_attr); | ||
|
||
TensorDistAttr dweight_dist_attr = dweight.dist_attr(); | ||
auto dweight_shape = common::vectorize(dweight.dims()); | ||
TensorDistAttr dbias_dist_attr = dbias.dist_attr(); | ||
auto dbias_shape = common::vectorize(dbias.dims()); | ||
|
||
TensorDistAttr bias_grad_dist_attr; | ||
if (has_bias) { | ||
bias_grad_dist_attr = ReduceGradBroadCastDims(1, dout.dist_attr()); | ||
} | ||
|
||
// check dweight and dweight_grad | ||
if (!IsEmpty(dweight_shape)) { | ||
PADDLE_ENFORCE_EQ(dweight_dist_attr, | ||
weight_grad_dist_attr, | ||
phi::errors::InvalidArgument( | ||
"dweight_dist_attr [%s] and weight_grad_dist_attr " | ||
"[%s] should be equal", | ||
dweight_dist_attr.to_string(), | ||
weight_grad_dist_attr.to_string())); | ||
} | ||
// check dbias and bias_grad | ||
if (!IsEmpty(dbias_shape)) { | ||
PADDLE_ENFORCE_EQ( | ||
dbias_dist_attr, | ||
bias_grad_dist_attr, | ||
phi::errors::InvalidArgument( | ||
"dbias_dist_attr [%s] and bias_grad_dist_attr [%s] should be equal", | ||
dbias_dist_attr.to_string(), | ||
bias_grad_dist_attr.to_string())); | ||
} | ||
|
||
return {{x_dist_attr, dout_dist_attr, dweight_dist_attr, dbias_dist_attr}, | ||
{weight_grad_dist_attr, bias_grad_dist_attr}}; | ||
} | ||
|
||
SpmdInfo FusedLinearParamGradAddInferSpmdFakeReverse() { return SpmdInfo(); } | ||
|
||
} // namespace distributed | ||
} // namespace phi |
32 changes: 32 additions & 0 deletions
32
paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include <vector> | ||
|
||
#include "paddle/phi/common/int_array.h" | ||
#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h" | ||
#include "paddle/phi/core/distributed/type_defs.h" | ||
|
||
namespace phi { | ||
namespace distributed { | ||
SpmdInfo FusedLinearParamGradAddInferSpmd(const DistMetaTensor& x, | ||
const DistMetaTensor& dout, | ||
const DistMetaTensor& dweight, | ||
const DistMetaTensor& dbias, | ||
bool multi_precision, | ||
bool has_bias); | ||
|
||
SpmdInfo FusedLinearParamGradAddInferSpmdFakeReverse(); | ||
} // namespace distributed | ||
} // namespace phi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
test/auto_parallel/hybrid_strategy/semi_auto_parallel_for_fused_linear_param_grad_add.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
|
||
import numpy as np | ||
|
||
import paddle | ||
import paddle.distributed as dist | ||
from paddle import _C_ops | ||
|
||
|
||
class TestFusedParamGradAddForSemiAutoParallel: | ||
def __init__(self): | ||
self._dtype = os.getenv("dtype") | ||
self._backend = os.getenv("backend") | ||
self._seed = eval(os.getenv("seed")) | ||
self._mesh = dist.ProcessMesh([[0, 1], [2, 3]], dim_names=["x", "y"]) | ||
|
||
def check_tensor_eq(self, a, b): | ||
np1 = a.numpy() | ||
np2 = b.numpy() | ||
np.testing.assert_allclose(np1, np2, rtol=1e-05, verbose=True) | ||
|
||
def test_body(self): | ||
x_shape = [4, 16, 32] | ||
y_shape = [4, 16, 64] | ||
|
||
paddle.seed(self._seed) | ||
np.random.seed(self._seed) | ||
|
||
x_np = np.random.random(size=x_shape).astype(self._dtype) | ||
y_np = np.random.random(size=y_shape).astype(self._dtype) | ||
|
||
def run_acc_step(x, y): | ||
weight_grad = None | ||
bias_grad = None | ||
for _ in range(2): | ||
weight_grad, bias_grad = _C_ops.fused_linear_param_grad_add( | ||
x, | ||
y, | ||
weight_grad, | ||
bias_grad, | ||
False, | ||
True, | ||
) | ||
return weight_grad, bias_grad | ||
|
||
x = paddle.to_tensor(x_np) | ||
y = paddle.to_tensor(y_np) | ||
x.stop_gradient = True | ||
y.stop_gradient = True | ||
|
||
weight_grad, bias_grad = run_acc_step(x, y) | ||
|
||
# test mp col split | ||
x_placements = [dist.Shard(0), dist.Replicate()] | ||
y_placements = [dist.Shard(0), dist.Shard(2)] | ||
|
||
dist_x = dist.shard_tensor(x_np, self._mesh, x_placements) | ||
dist_y = dist.shard_tensor(y_np, self._mesh, y_placements) | ||
dist_x.stop_gradient = True | ||
dist_y.stop_gradient = True | ||
|
||
weight_grad_dist, bias_grad_dist = run_acc_step(dist_x, dist_y) | ||
self.check_tensor_eq(weight_grad, weight_grad_dist) | ||
self.check_tensor_eq(bias_grad, bias_grad_dist) | ||
|
||
# test mp row split | ||
x_placements = [dist.Shard(0), dist.Shard(2)] | ||
y_placements = [dist.Shard(0), dist.Replicate()] | ||
dist_x = dist.shard_tensor(x_np, self._mesh, x_placements) | ||
dist_y = dist.shard_tensor(y_np, self._mesh, y_placements) | ||
dist_x.stop_gradient = True | ||
dist_y.stop_gradient = True | ||
weight_grad_dist, bias_grad_dist = run_acc_step(dist_x, dist_y) | ||
self.check_tensor_eq(weight_grad, weight_grad_dist) | ||
self.check_tensor_eq(bias_grad, bias_grad_dist) | ||
|
||
def test_fused_linear_param_grad_add(self): | ||
self.test_body() | ||
|
||
def run_test_case(self): | ||
if self._backend == "cpu": | ||
paddle.set_device("cpu") | ||
elif self._backend == "gpu": | ||
paddle.set_device("gpu:" + str(dist.get_rank())) | ||
else: | ||
raise ValueError("Only support cpu or gpu backend.") | ||
|
||
self.test_fused_linear_param_grad_add() | ||
|
||
|
||
if __name__ == '__main__': | ||
TestFusedParamGradAddForSemiAutoParallel().run_test_case() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.