Skip to content

Commit

Permalink
dfu_impl: replace slow enforce_dfv method
Browse files Browse the repository at this point in the history
problem: In commit b96899b, as part of adding node exclusive,
enforce_dfv was added to precisely mark unconstrained resources between
the root of the containment tree and the constrained resources.  This
works fine, but it's a _full traversal of the graph_ that gets run even
when selecting a single core.  Usually this isn't terrible performance
because if the Node type is constrained then all the node vertices get
skipped after the constrained one is found. When the job is only
constrained on a core or socket however, or if the node isn't found
quickly, it can be as bad as a full traversal.  Literally 90% of the
time spent matching a small single core job in the perf test case in
issue #1171.

solution: The short version is: Remove enforce_dfv completely.  The
longer version is that instead of traversing the entire graph, we now
re-use the iteration over constrained resources in dfu_impl_t::enforce
and walk upward from each constrained vertex in the dominant subsystem
until we find a vertex that we've already set best_k (since any above
that must already have been set).  That way we walk the minimum amount
of the tree necessary.  Honestly it's still doing more work than it
should, because we're still iterating over resources that are children
of exclusive resources unnecessarily, and we're having to iterate over
`in_edges` rather than directly knowing the parent or subsetting on
subsystem, but fixing either of those would be a meaningful refactor so
I'm stopping here for now with the 15-20x speedup.

fixes #1171
  • Loading branch information
trws committed Aug 31, 2024
1 parent 0c55695 commit 2eec4cd
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 65 deletions.
101 changes: 42 additions & 59 deletions resource/traversers/dfu_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ extern "C" {
}

#include "resource/traversers/dfu_impl.hpp"
#include <boost/iterator/transform_iterator.hpp>

using namespace Flux::Jobspec;
using namespace Flux::resource_model;
Expand Down Expand Up @@ -42,7 +43,7 @@ void dfu_impl_t::tick ()
bool dfu_impl_t::in_subsystem (edg_t e, subsystem_t subsystem) const
{
// Short circuit for single subsystem. This will be a common case.
return !(*m_graph)[e].idata.member_of[subsystem].empty();
return (*m_graph)[e].subsystem == subsystem;
}

bool dfu_impl_t::stop_explore (edg_t e, subsystem_t subsystem) const
Expand Down Expand Up @@ -839,51 +840,6 @@ int dfu_impl_t::dom_find_dfv (std::shared_ptr<match_writers_t> &w,
return rc;
}

int dfu_impl_t::enforce_dfv (vtx_t u, scoring_api_t &dfu)
{
int rc = 0;
subsystem_t dom = m_match->dom_subsystem ();
f_out_edg_iterator_t ei, ei_end;
(*m_graph)[u].idata.colors[dom] = m_color.gray ();

for (auto &subsystem : m_match->subsystems ()) {
for (tie (ei, ei_end) = out_edges (u, *m_graph); ei != ei_end; ++ei) {
if (stop_explore (*ei, subsystem) || !in_subsystem (*ei, subsystem))
continue;
vtx_t tgt = target (*ei, *m_graph);
bool tgt_constrained = true;
if (dfu.is_contained (subsystem, (*m_graph)[tgt].type)) {
if (!dfu.best_k (subsystem, (*m_graph)[tgt].type))
tgt_constrained = false;
}

if (tgt_constrained) {
// We already decided best_k for the target resource vertex type
// Thus, as soon as we encounter one best_k edge, the ancestor
// edge should be marked best_k.
if ((*m_graph)[*ei].idata.get_trav_token () == m_best_k_cnt) {
rc = 1;
break;
}
} else {
// We have not decided best_k for the target.
// Thus, the subtree must be explored to decide if the ancestor
// edge should be marked best_k.
if (enforce_dfv (tgt, dfu) == 1) {
(*m_graph)[*ei].idata.set_for_trav_update ((*m_graph)[tgt].size,
false,
m_best_k_cnt);
rc = 1;
continue;
}
}
}
}
(*m_graph)[u].idata.colors[dom] = m_color.black ();

return rc;
}

int dfu_impl_t::has_root (vtx_t root,
std::vector<Resource> &resources,
scoring_api_t &dfu,
Expand Down Expand Up @@ -928,19 +884,10 @@ int dfu_impl_t::enforce_constrained (scoring_api_t &dfu)
{
int rc = 0;
for (auto subsystem : m_match->subsystems ())
rc += enforce (subsystem, dfu);
rc += enforce (subsystem, dfu, true);
return rc;
}

int dfu_impl_t::enforce_remaining (vtx_t root, scoring_api_t &dfu)
{
int rc = 0;
m_color.reset ();
rc = enforce_dfv (root, dfu);
m_color.reset ();
return (rc < 0) ? -1 : 0;
}

int dfu_impl_t::resolve_graph (vtx_t root,
std::vector<Resource> &resources,
scoring_api_t &dfu,
Expand All @@ -957,8 +904,7 @@ int dfu_impl_t::resolve_graph (vtx_t root,
goto done;
if (enforce_constrained (dfu) != 0)
goto done;
if ((rc = enforce_remaining (root, dfu)) != 0)
goto done;
rc = 0;
done:
return rc;
}
Expand All @@ -977,11 +923,21 @@ int dfu_impl_t::resolve (scoring_api_t &dfu, scoring_api_t &to_parent)
return rc;
}

int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu)
std::optional<edg_t> find_parent_edge (vtx_t v, resource_graph_t &g, subsystem_t s)
{
for (auto [ei, e_end] = in_edges (v, g); ei != e_end; ++ei) {
if (g[*ei].subsystem == s)
return *ei;
}
return std::nullopt;
}

int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu, bool enforce_unconstrained)
{
int rc = 0;
try {
std::vector<resource_type_t> resource_types;
subsystem_t dom = m_match->dom_subsystem ();
dfu.resrc_types (subsystem, resource_types);
for (auto &t : resource_types) {
if (!dfu.best_k (subsystem, t))
Expand All @@ -996,6 +952,33 @@ int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu)
(*m_graph)[e.edge].idata.set_for_trav_update (e.needs,
e.exclusive,
m_best_k_cnt);
// we need to resolve unconstrained resources up to the root in the dominant
// subsystem
if (enforce_unconstrained && subsystem == dom) {
// there can only be one in-edge in this subsystem, so find it
vtx_t parent_v = source (e.edge, *m_graph);
std::optional<edg_t> parent_e;
// only the root doesn't have one, so natural stop
while ((parent_e = find_parent_edge (parent_v, *m_graph, subsystem))) {
// if this parent is already marked, all of its will be too
if ((*m_graph)[*parent_e].idata.get_trav_token () == m_best_k_cnt) {
break;
}
parent_v = source (*parent_e, *m_graph);
vtx_t tgt = target (*parent_e, (*m_graph));
bool tgt_constrained = true;
if (dfu.is_contained (subsystem, (*m_graph)[tgt].type)) {
if (!dfu.best_k (subsystem, (*m_graph)[tgt].type))
tgt_constrained = false;
}
if (tgt_constrained) {
continue;
}
(*m_graph)[*parent_e].idata.set_for_trav_update ((*m_graph)[tgt].size,
false,
m_best_k_cnt);
}
}
}
}
}
Expand Down
7 changes: 1 addition & 6 deletions resource/traversers/dfu_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#ifndef DFU_TRAVERSE_IMPL_HPP
#define DFU_TRAVERSE_IMPL_HPP

#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdint>
#include <memory>
Expand All @@ -27,7 +25,6 @@
#include "resource/writers/match_writers.hpp"
#include "resource/store/resource_graph_store.hpp"
#include "resource/readers/resource_reader_base.hpp"
#include "resource/planner/c/planner.h"

namespace Flux {
namespace resource_model {
Expand Down Expand Up @@ -488,10 +485,8 @@ class dfu_impl_t {
bool excl,
unsigned int *needs);
int resolve (scoring_api_t &dfu, scoring_api_t &to_parent);
int enforce (subsystem_t subsystem, scoring_api_t &dfu);
int enforce (subsystem_t subsystem, scoring_api_t &dfu, bool enforce_unconstrained=false);
int enforce_constrained (scoring_api_t &dfu);
int enforce_dfv (vtx_t u, scoring_api_t &dfu);
int enforce_remaining (vtx_t u, scoring_api_t &dfu);

/************************************************************************
* *
Expand Down

0 comments on commit 2eec4cd

Please sign in to comment.