Skip to content

Commit

Permalink
traverser: add partial cancellation functionality
Browse files Browse the repository at this point in the history
Problem: Fluxion issue
flux-framework#1151 and flux-core
issue flux-framework/flux-core#4312
identified the need for partial release of resources. The current
functionality need is to release all resources managed by a single
broker rank. In the future support for releasing arbitrary subgraphs
will be needed for cloud and converged use cases.

Modify the rem_* traverser functions to take a modification type and
type_to_count unordered_map. Add logic in the recursive job
modification calls to distinguish between a full and partial job
cancellation and issue corresponding planner interface calls, handling
errors as needed.
  • Loading branch information
milroy committed Jun 28, 2024
1 parent c914759 commit 55a767e
Show file tree
Hide file tree
Showing 4 changed files with 300 additions and 64 deletions.
18 changes: 18 additions & 0 deletions resource/traversers/dfu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,24 @@ int dfu_traverser_t::remove (int64_t jobid)
return detail::dfu_impl_t::remove (root, jobid);
}

int dfu_traverser_t::remove (const std::string &R_to_cancel,
std::shared_ptr<resource_reader_base_t> &reader,
int64_t jobid, bool &full_cancel)
{
const subsystem_t &dom = get_match_cb ()->dom_subsystem ();
if (!get_graph () || !get_graph_db ()
|| get_graph_db ()->metadata.roots.find (dom)
== get_graph_db ()->metadata.roots.end ()
|| !get_match_cb ()) {
errno = EINVAL;
return -1;
}

vtx_t root = get_graph_db ()->metadata.roots.at (dom);
return detail::dfu_impl_t::remove (root, R_to_cancel, reader, jobid,
full_cancel);
}

int dfu_traverser_t::mark (const std::string &root_path,
resource_pool_t::status_t status)
{
Expand Down
16 changes: 16 additions & 0 deletions resource/traversers/dfu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,22 @@ class dfu_traverser_t : protected detail::dfu_impl_t
*/
int remove (int64_t jobid);

/*! Remove the allocation/reservation referred to by jobid and update
* the resource state.
*
* \param R_to_cancel deallocation string such as written in JGF.
* \param reader reader object that deserialize str to update the
* graph
* \param jobid job id.
* \param full_cancel bool indicating if the partial cancel cancelled all
* job resources
* \return 0 on success; -1 on error.
* EINVAL: graph, roots or match callback not set.
*/
int remove (const std::string &to_cancel,
std::shared_ptr<resource_reader_base_t> &reader,
int64_t jobid, bool &full_cancel);

/*! Mark the resource status up|down|etc starting at subtree_root.
*
* \param root_path path to the root of the subtree to update.
Expand Down
36 changes: 28 additions & 8 deletions resource/traversers/dfu_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,21 @@ class dfu_impl_t {
*/
int remove (vtx_t root, int64_t jobid);

/*! Remove the allocation/reservation referred to by jobid and update
* the resource state.
*
* \param root root resource vertex.
* \param to_cancel deallocation string such as written in JGF.
* \param reader reader object that deserialize str to update the graph
* \param jobid job id.
* \param full_cancel bool indicating if the partial cancel cancelled all
* job resources
* \return 0 on success; -1 on error.
*/
int remove (vtx_t root, const std::string &to_cancel,
std::shared_ptr<resource_reader_base_t> &reader,
int64_t jobid, bool &full_cancel);

/*! Update the resource status to up|down|etc starting at subtree_root.
*
* \param root_path path to the root of the subtree to update.
Expand Down Expand Up @@ -474,14 +489,19 @@ class dfu_impl_t {
unsigned int needs, bool excl, const jobmeta_t &jobmeta,
bool full, std::map<std::string, int64_t> &to_parent,
bool emit_shadow);

int rem_txfilter (vtx_t u, int64_t jobid, bool &stop);
int rem_agfilter (vtx_t u, int64_t jobid, const std::string &s);
int rem_idata (vtx_t u, int64_t jobid, const std::string &s, bool &stop);
int rem_plan (vtx_t u, int64_t jobid);
int rem_upv (vtx_t u, int64_t jobid);
int rem_dfv (vtx_t u, int64_t jobid);
int rem_exv (int64_t jobid);
bool rem_tag (vtx_t u, int64_t jobid);
int rem_exclusive_filter (vtx_t u, int64_t jobid,
const modify_data_t &mod_data);
int mod_agfilter (vtx_t u, int64_t jobid, const std::string &s,
const modify_data_t &mod_data, bool &stop);
int mod_idata (vtx_t u, int64_t jobid, const std::string &s,
const modify_data_t &mod_data, bool &stop);
int mod_plan (vtx_t u, int64_t jobid, modify_data_t &mod_data);
int mod_upv (vtx_t u, int64_t jobid, const modify_data_t &mod_data);
int mod_dfv (vtx_t u, int64_t jobid, modify_data_t &mod_data);
int mod_exv (int64_t jobid, const modify_data_t &mod_data);
int cancel_vertex (vtx_t vtx, modify_data_t &mod_data,
int64_t jobid);


/************************************************************************
Expand Down
Loading

0 comments on commit 55a767e

Please sign in to comment.