From 2eec4cd4f5b680d64d7153bbf36c972aff757870 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Sat, 31 Aug 2024 10:23:03 -0700 Subject: [PATCH] dfu_impl: replace slow enforce_dfv method problem: In commit b96899b, as part of adding node exclusive, enforce_dfv was added to precisely mark unconstrained resources between the root of the containment tree and the constrained resources. This works fine, but it's a _full traversal of the graph_ that gets run even when selecting a single core. Usually this isn't terrible performance because if the Node type is constrained then all the node vertices get skipped after the constrained one is found. When the job is only constrained on a core or socket however, or if the node isn't found quickly, it can be as bad as a full traversal. Literally 90% of the time spent matching a small single core job in the perf test case in issue #1171. solution: The short version is: Remove enforce_dfv completely. The longer version is that instead of traversing the entire graph, we now re-use the iteration over constrained resources in dfu_impl_t::enforce and walk upward from each constrained vertex in the dominant subsystem until we find a vertex that we've already set best_k (since any above that must already have been set). That way we walk the minimum amount of the tree necessary. Honestly it's still doing more work than it should, because we're still iterating over resources that are children of exclusive resources unnecessarily, and we're having to iterate over `in_edges` rather than directly knowing the parent or subsetting on subsystem, but fixing either of those would be a meaningful refactor so I'm stopping here for now with the 15-20x speedup. fixes #1171 --- resource/traversers/dfu_impl.cpp | 101 +++++++++++++------------------ resource/traversers/dfu_impl.hpp | 7 +-- 2 files changed, 43 insertions(+), 65 deletions(-) diff --git a/resource/traversers/dfu_impl.cpp b/resource/traversers/dfu_impl.cpp index f07c3768c..aa7909876 100644 --- a/resource/traversers/dfu_impl.cpp +++ b/resource/traversers/dfu_impl.cpp @@ -15,6 +15,7 @@ extern "C" { } #include "resource/traversers/dfu_impl.hpp" +#include using namespace Flux::Jobspec; using namespace Flux::resource_model; @@ -42,7 +43,7 @@ void dfu_impl_t::tick () bool dfu_impl_t::in_subsystem (edg_t e, subsystem_t subsystem) const { // Short circuit for single subsystem. This will be a common case. - return !(*m_graph)[e].idata.member_of[subsystem].empty(); + return (*m_graph)[e].subsystem == subsystem; } bool dfu_impl_t::stop_explore (edg_t e, subsystem_t subsystem) const @@ -839,51 +840,6 @@ int dfu_impl_t::dom_find_dfv (std::shared_ptr &w, return rc; } -int dfu_impl_t::enforce_dfv (vtx_t u, scoring_api_t &dfu) -{ - int rc = 0; - subsystem_t dom = m_match->dom_subsystem (); - f_out_edg_iterator_t ei, ei_end; - (*m_graph)[u].idata.colors[dom] = m_color.gray (); - - for (auto &subsystem : m_match->subsystems ()) { - for (tie (ei, ei_end) = out_edges (u, *m_graph); ei != ei_end; ++ei) { - if (stop_explore (*ei, subsystem) || !in_subsystem (*ei, subsystem)) - continue; - vtx_t tgt = target (*ei, *m_graph); - bool tgt_constrained = true; - if (dfu.is_contained (subsystem, (*m_graph)[tgt].type)) { - if (!dfu.best_k (subsystem, (*m_graph)[tgt].type)) - tgt_constrained = false; - } - - if (tgt_constrained) { - // We already decided best_k for the target resource vertex type - // Thus, as soon as we encounter one best_k edge, the ancestor - // edge should be marked best_k. - if ((*m_graph)[*ei].idata.get_trav_token () == m_best_k_cnt) { - rc = 1; - break; - } - } else { - // We have not decided best_k for the target. - // Thus, the subtree must be explored to decide if the ancestor - // edge should be marked best_k. - if (enforce_dfv (tgt, dfu) == 1) { - (*m_graph)[*ei].idata.set_for_trav_update ((*m_graph)[tgt].size, - false, - m_best_k_cnt); - rc = 1; - continue; - } - } - } - } - (*m_graph)[u].idata.colors[dom] = m_color.black (); - - return rc; -} - int dfu_impl_t::has_root (vtx_t root, std::vector &resources, scoring_api_t &dfu, @@ -928,19 +884,10 @@ int dfu_impl_t::enforce_constrained (scoring_api_t &dfu) { int rc = 0; for (auto subsystem : m_match->subsystems ()) - rc += enforce (subsystem, dfu); + rc += enforce (subsystem, dfu, true); return rc; } -int dfu_impl_t::enforce_remaining (vtx_t root, scoring_api_t &dfu) -{ - int rc = 0; - m_color.reset (); - rc = enforce_dfv (root, dfu); - m_color.reset (); - return (rc < 0) ? -1 : 0; -} - int dfu_impl_t::resolve_graph (vtx_t root, std::vector &resources, scoring_api_t &dfu, @@ -957,8 +904,7 @@ int dfu_impl_t::resolve_graph (vtx_t root, goto done; if (enforce_constrained (dfu) != 0) goto done; - if ((rc = enforce_remaining (root, dfu)) != 0) - goto done; + rc = 0; done: return rc; } @@ -977,11 +923,21 @@ int dfu_impl_t::resolve (scoring_api_t &dfu, scoring_api_t &to_parent) return rc; } -int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu) +std::optional find_parent_edge (vtx_t v, resource_graph_t &g, subsystem_t s) +{ + for (auto [ei, e_end] = in_edges (v, g); ei != e_end; ++ei) { + if (g[*ei].subsystem == s) + return *ei; + } + return std::nullopt; +} + +int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu, bool enforce_unconstrained) { int rc = 0; try { std::vector resource_types; + subsystem_t dom = m_match->dom_subsystem (); dfu.resrc_types (subsystem, resource_types); for (auto &t : resource_types) { if (!dfu.best_k (subsystem, t)) @@ -996,6 +952,33 @@ int dfu_impl_t::enforce (subsystem_t subsystem, scoring_api_t &dfu) (*m_graph)[e.edge].idata.set_for_trav_update (e.needs, e.exclusive, m_best_k_cnt); + // we need to resolve unconstrained resources up to the root in the dominant + // subsystem + if (enforce_unconstrained && subsystem == dom) { + // there can only be one in-edge in this subsystem, so find it + vtx_t parent_v = source (e.edge, *m_graph); + std::optional parent_e; + // only the root doesn't have one, so natural stop + while ((parent_e = find_parent_edge (parent_v, *m_graph, subsystem))) { + // if this parent is already marked, all of its will be too + if ((*m_graph)[*parent_e].idata.get_trav_token () == m_best_k_cnt) { + break; + } + parent_v = source (*parent_e, *m_graph); + vtx_t tgt = target (*parent_e, (*m_graph)); + bool tgt_constrained = true; + if (dfu.is_contained (subsystem, (*m_graph)[tgt].type)) { + if (!dfu.best_k (subsystem, (*m_graph)[tgt].type)) + tgt_constrained = false; + } + if (tgt_constrained) { + continue; + } + (*m_graph)[*parent_e].idata.set_for_trav_update ((*m_graph)[tgt].size, + false, + m_best_k_cnt); + } + } } } } diff --git a/resource/traversers/dfu_impl.hpp b/resource/traversers/dfu_impl.hpp index 2161e9bbd..8ff1a0f51 100644 --- a/resource/traversers/dfu_impl.hpp +++ b/resource/traversers/dfu_impl.hpp @@ -11,8 +11,6 @@ #ifndef DFU_TRAVERSE_IMPL_HPP #define DFU_TRAVERSE_IMPL_HPP -#include -#include #include #include #include @@ -27,7 +25,6 @@ #include "resource/writers/match_writers.hpp" #include "resource/store/resource_graph_store.hpp" #include "resource/readers/resource_reader_base.hpp" -#include "resource/planner/c/planner.h" namespace Flux { namespace resource_model { @@ -488,10 +485,8 @@ class dfu_impl_t { bool excl, unsigned int *needs); int resolve (scoring_api_t &dfu, scoring_api_t &to_parent); - int enforce (subsystem_t subsystem, scoring_api_t &dfu); + int enforce (subsystem_t subsystem, scoring_api_t &dfu, bool enforce_unconstrained=false); int enforce_constrained (scoring_api_t &dfu); - int enforce_dfv (vtx_t u, scoring_api_t &dfu); - int enforce_remaining (vtx_t u, scoring_api_t &dfu); /************************************************************************ * *