Skip to content

Commit

Permalink
#2201: loosen strict inequalities for criterion; remove epsilon from …
Browse files Browse the repository at this point in the history
…computeWork
  • Loading branch information
cwschilly authored and cz4rs committed Sep 20, 2024
1 parent 23bd9c3 commit 45518fd
Show file tree
Hide file tree
Showing 6 changed files with 370 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/temperedlb/criterion.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct GrapevineCriterion {

struct ModifiedGrapevineCriterion {
bool operator()(LoadType over, LoadType under, LoadType obj, LoadType) const {
return obj < over - under;
return obj <= over - under;
}
};

Expand Down
18 changes: 9 additions & 9 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ Default: Original
{
"ordering",
R"(
Values: {Arbitrary, ElmID, FewestMigrations, SmallObject, LargestObjects}
Values: {Arbitrary, ElmID, FewestMigrations, SmallObjects, LargestObjects}
Default: FewestMigrations
Description:
The order in which local objects are considered for transfer. Options are:
Expand Down Expand Up @@ -860,8 +860,8 @@ double TemperedLB::computeWork(
alpha * load +
beta * inter_comm_bytes +
gamma * intra_comm_bytes +
delta * shared_comm_bytes +
epsilon;
delta * shared_comm_bytes;
// epsilon;
}

WorkBreakdown TemperedLB::computeWorkBreakdown(
Expand Down Expand Up @@ -1894,7 +1894,7 @@ std::vector<TemperedLB::ObjIDType> TemperedLB::orderObjects(
auto single_obj_load = this_new_load;
for (auto &obj : cur_objs) {
auto obj_load = obj.second;
if (obj_load > over_avg && obj_load < single_obj_load) {
if (obj_load >= over_avg && obj_load < single_obj_load) {
single_obj_load = obj_load;
}
}
Expand Down Expand Up @@ -2340,7 +2340,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
try_rank, try_info, try_total_bytes, try_max_owm, try_max_osm,
src_cluster, empty_cluster
);
if (c_try > 0.0) {
if (c_try >= 0.0) {
if (c_try > best_c_try) {
best_c_try = c_try;
best_swap = std::make_tuple(src_shared_id, no_shared_id);
Expand All @@ -2358,7 +2358,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
"testing a possible swap (rank {}): {} {} c_try={}\n",
try_rank, src_shared_id, try_shared_id, c_try
);
if (c_try > 0.0) {
if (c_try >= 0.0) {
if (c_try > best_c_try) {
best_c_try = c_try;
best_swap = std::make_tuple(src_shared_id, try_shared_id);
Expand All @@ -2367,7 +2367,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
}
}

if (best_c_try > 0) {
if (best_c_try >= 0) {
// FIXME C++20: use structured binding
auto const src_shared_id = std::get<0>(best_swap);
auto const try_shared_id = std::get<1>(best_swap);
Expand Down Expand Up @@ -2653,7 +2653,7 @@ void TemperedLB::swapClusters() {
{
ClusterInfo empty_cluster;
double c_try = criterion(try_rank, try_mem, src_cluster, empty_cluster);
if (c_try > 0.0) {
if (c_try >= 0.0) {
// Try to obtain lock for feasible swap
found_potential_good_swap = true;
proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try);
Expand All @@ -2665,7 +2665,7 @@ void TemperedLB::swapClusters() {
for (auto const& [try_shared_id, try_cluster] : try_clusters) {
// Decide whether swap is beneficial
double c_try = criterion(try_rank, try_mem, src_cluster, try_cluster);
if (c_try > 0.0) {
if (c_try >= 0.0) {
// Try to obtain lock for feasible swap
found_potential_good_swap = true;
proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try);
Expand Down
160 changes: 159 additions & 1 deletion tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json
Original file line number Diff line number Diff line change
@@ -1 +1,159 @@
{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]}
{
"metadata": {
"rank": 0,
"type": "LBDatafile"
},
"phases": [
{
"communications": [
{
"bytes": 2.0,
"from": {
"collection_id": 7,
"home": 0,
"seq_id": 0,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 1,
"seq_id": 5,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 1.0,
"from": {
"collection_id": 7,
"home": 0,
"seq_id": 1,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 1,
"seq_id": 4,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 1.0,
"from": {
"collection_id": 7,
"home": 0,
"seq_id": 3,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 0,
"seq_id": 2,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 0.5,
"from": {
"collection_id": 7,
"home": 0,
"seq_id": 3,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 2,
"seq_id": 8,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
}
],
"id": 0,
"tasks": [
{
"entity": {
"collection_id": 7,
"home": 0,
"seq_id": 1,
"migratable": true,
"type": "object"
},
"node": 0,
"resource": "cpu",
"time": 0.5,
"user_defined": {
"home_rank": 0,
"shared_bytes": 9.0,
"shared_id": 0
}
},
{
"entity": {
"collection_id": 7,
"home": 0,
"seq_id": 3,
"migratable": true,
"type": "object"
},
"node": 0,
"resource": "cpu",
"time": 0.5,
"user_defined": {
"home_rank": 0,
"shared_bytes": 9.0,
"shared_id": 1
}
},
{
"entity": {
"collection_id": 7,
"home": 0,
"seq_id": 2,
"migratable": true,
"type": "object"
},
"node": 0,
"resource": "cpu",
"time": 0.5,
"user_defined": {
"home_rank": 0,
"shared_bytes": 9.0,
"shared_id": 1
}
},
{
"entity": {
"collection_id": 7,
"home": 0,
"seq_id": 0,
"migratable": true,
"type": "object"
},
"node": 0,
"resource": "cpu",
"time": 1.0,
"user_defined": {
"home_rank": 0,
"shared_bytes": 9.0,
"shared_id": 0
}
}
]
}
]
}
141 changes: 140 additions & 1 deletion tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json
Original file line number Diff line number Diff line change
@@ -1 +1,140 @@
{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]}
{
"metadata": {
"rank": 1,
"type": "LBDatafile"
},
"phases": [
{
"communications": [
{
"bytes": 2.0,
"from": {
"collection_id": 7,
"home": 1,
"seq_id": 4,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 0,
"seq_id": 1,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 2.0,
"from": {
"collection_id": 7,
"home": 1,
"seq_id": 5,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 2,
"seq_id": 8,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 1.0,
"from": {
"collection_id": 7,
"home": 1,
"seq_id": 7,
"migratable": true,
"type": "object"
},
"messages": 1,
"to": {
"collection_id": 7,
"home": 1,
"seq_id": 6,
"migratable": true,
"type": "object"
},
"type": "SendRecv"
}
],
"id": 0,
"tasks": [
{
"entity": {
"collection_id": 7,
"home": 1,
"seq_id": 5,
"migratable": true,
"type": "object"
},
"node": 1,
"resource": "cpu",
"time": 2.0,
"user_defined": {
"home_rank": 1,
"shared_bytes": 9.0,
"shared_id": 2
}
},
{
"entity": {
"collection_id": 7,
"home": 1,
"seq_id": 4,
"migratable": true,
"type": "object"
},
"node": 1,
"resource": "cpu",
"time": 0.5,
"user_defined": {
"home_rank": 1,
"shared_bytes": 9.0,
"shared_id": 2
}
},
{
"entity": {
"collection_id": 7,
"home": 1,
"seq_id": 7,
"migratable": true,
"type": "object"
},
"node": 1,
"resource": "cpu",
"time": 0.5,
"user_defined": {
"home_rank": 1,
"shared_bytes": 9.0,
"shared_id": 3
}
},
{
"entity": {
"collection_id": 7,
"home": 1,
"seq_id": 6,
"migratable": true,
"type": "object"
},
"node": 1,
"resource": "cpu",
"time": 1.0,
"user_defined": {
"home_rank": 1,
"shared_bytes": 9.0,
"shared_id": 3
}
}
]
}
]
}
Loading

0 comments on commit 45518fd

Please sign in to comment.