-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dist transpiler support prefetch #9714
Changes from all commits
edcfcad
66ab88a
29174df
54656a1
171560b
3605922
d3f2d4c
6973bcb
38ed3e8
eb31b66
b4e974a
d672592
2e69b77
3ad3eea
a07a063
53d6459
e0fca82
b1e398d
cf9d25f
064a913
f81d6b3
f467b18
4b8189f
d1c8f4b
9d3ecca
dff691c
bb27df1
356b9e6
2f4962d
e2674e8
063a956
fde5445
8eea574
193be56
4554e7b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and | |
limitations under the License. */ | ||
|
||
#include <ostream> | ||
#include <thread> | ||
#include <thread> // NOLINT | ||
#include <vector> | ||
|
||
#include "paddle/fluid/operators/listen_and_serv_op.h" | ||
|
||
|
@@ -88,27 +89,35 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, | |
|
||
auto ins = Inputs("X"); | ||
auto fan_in = Attr<int>("Fanin"); | ||
auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock); | ||
auto *program = block->Program(); | ||
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock); | ||
auto *prefetch_block = Attr<framework::BlockDesc *>(kPrefetchBlock); | ||
auto *program = optimize_block->Program(); | ||
size_t num_blocks = program->Size(); | ||
PADDLE_ENFORCE_GE(num_blocks, 2, | ||
"server program should have at least 2 blocks"); | ||
|
||
framework::Executor executor(dev_place); | ||
std::vector<int> block_list; | ||
for (size_t blkid = 1; blkid < num_blocks; ++blkid) { | ||
block_list.push_back(blkid); | ||
if (blkid != prefetch_block->ID()) { | ||
block_list.push_back(blkid); | ||
} | ||
} | ||
auto prepared = executor.Prepare(*program, block_list); | ||
auto optimize_prepared = executor.Prepare(*program, block_list); | ||
// Insert placeholder for block0 which holds current op itself. | ||
prepared.insert(prepared.begin(), | ||
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr)); | ||
optimize_prepared.insert( | ||
optimize_prepared.begin(), | ||
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr)); | ||
|
||
rpc_service_->SetScope(&recv_scope); | ||
rpc_service_->SetDevCtx(&dev_ctx); | ||
// TODO(qiao) set proper fields for table lookup and update | ||
rpc_service_->SetExecutor(&executor); | ||
rpc_service_->SetPrefetchBlkdId(0); | ||
VLOG(3) << "prefetch block id is " << prefetch_block->ID(); | ||
auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code L106 have already prepared all the blocks, so we don't need to prepare the prefetch_block again. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I skipped the prefetch_block here https://github.com/PaddlePaddle/Paddle/pull/9714/files#diff-64ee97d744659db61dc8ae72bfc103b5R102 |
||
rpc_service_->SetPrefetchBlkdId(prefetch_block->ID()); | ||
rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get()); | ||
prefetch_prepared.release(); | ||
rpc_service_->SetProgram(program); | ||
// start the server listening after all member initialized. | ||
server_thread_.reset(new std::thread(RunServer, rpc_service_)); | ||
|
@@ -166,16 +175,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, | |
parallel_blkids.push_back(1); | ||
double ts = detail::GetTimestamp(); | ||
for (size_t blkid = 2; blkid < num_blocks; ++blkid) { | ||
if (program->Block(blkid).Parent() != last_parent_blkid) { | ||
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, | ||
&recv_scope); | ||
parallel_blkids.clear(); | ||
last_parent_blkid = program->Block(blkid).Parent(); | ||
if (blkid != prefetch_block->ID()) { | ||
if (program->Block(blkid).Parent() != last_parent_blkid) { | ||
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared, | ||
program, &recv_scope); | ||
parallel_blkids.clear(); | ||
last_parent_blkid = program->Block(blkid).Parent(); | ||
} | ||
parallel_blkids.push_back(blkid); | ||
} | ||
parallel_blkids.push_back(blkid); | ||
} | ||
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, | ||
&recv_scope); | ||
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared, | ||
program, &recv_scope); | ||
VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)"; | ||
|
||
// Reset the received sparse variables, the sum operator would not | ||
|
@@ -211,6 +222,8 @@ from send_op and send back variables to recv_op. | |
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); | ||
AddAttr<framework::BlockDesc *>(kOptimizeBlock, | ||
"BlockID to run on server side."); | ||
AddAttr<framework::BlockDesc *>(kPrefetchBlock, | ||
"prefetch block to run on server side."); | ||
AddAttr<int>("Fanin", "How many clients send to this server.") | ||
.SetDefault(1); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <future> | ||
#include <future> // NOLINT | ||
#include <ostream> | ||
|
||
#include "paddle/fluid/framework/data_type.h" | ||
|
@@ -50,8 +50,8 @@ class PrefetchOp : public framework::OperatorBase { | |
|
||
for (size_t i = 0; i < ins.size(); i++) { | ||
if (NeedSend(scope, ins[i])) { | ||
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << "to get " | ||
<< outs[i] << "back"; | ||
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get " | ||
<< outs[i] << " back"; | ||
rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i], | ||
outs[i]); | ||
} else { | ||
|
@@ -71,7 +71,7 @@ class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker { | |
"(RPCClient) The RPC client object which will be" | ||
"initialized at most once."); | ||
AddOutput("Out", | ||
"(SelectedRows) result " | ||
"(LoDTensor) result " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the type of Output variable is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here it should be LoDTensor, because the following op is not certain, most of them can only process LoDTensor, SelectedRows is constructed when backward. |
||
"to be fetched from parameter server") | ||
.AsDuplicable(); | ||
AddAttr<std::vector<std::string>>( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to prepare all the blocks of the program, so maybe the name
prepared
is more suitable?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
optimize_prepared is used to be different with prefetch_prepared