From f5e7b577703ce610526a7ab8a0176f1279b43da5 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Thu, 4 Apr 2024 18:59:30 -0700 Subject: [PATCH 01/24] readers: add partial_cancel function and implementation for rv1 Problem: there are no reader functions for partial job cancellation. Add not-yet-implemented functions for all readers, and a full implementation for RV1exec. For RV1exec, add the capability to unpack and read the ranks to be cancelled from the free R payload. Also add a modify_data_t struct to store the type of modification to be performed on the jobid as well as ranks and types removed. Return the updated modify_data_t struct to the traverser. --- resource/readers/resource_reader_base.hpp | 23 ++++++ resource/readers/resource_reader_grug.cpp | 9 +++ resource/readers/resource_reader_grug.hpp | 15 ++++ resource/readers/resource_reader_hwloc.cpp | 9 +++ resource/readers/resource_reader_hwloc.hpp | 15 ++++ resource/readers/resource_reader_jgf.cpp | 9 +++ resource/readers/resource_reader_jgf.hpp | 15 ++++ resource/readers/resource_reader_rv1exec.cpp | 78 ++++++++++++++++++++ resource/readers/resource_reader_rv1exec.hpp | 24 +++++- 9 files changed, 196 insertions(+), 1 deletion(-) diff --git a/resource/readers/resource_reader_base.hpp b/resource/readers/resource_reader_base.hpp index 02d8483a5..a64aa17a8 100644 --- a/resource/readers/resource_reader_base.hpp +++ b/resource/readers/resource_reader_base.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include "resource/schema/resource_graph.hpp" #include "resource/store/resource_graph_store.hpp" #include "resource/readers/resource_namespace_remapper.hpp" @@ -21,6 +22,13 @@ namespace Flux { namespace resource_model { +enum class job_modify_t { CANCEL, PARTIAL_CANCEL, VTX_CANCEL }; + +struct modify_data_t { + job_modify_t mod_type = job_modify_t::PARTIAL_CANCEL; + std::unordered_set ranks_removed; + std::unordered_map type_to_count; +}; /*! Base resource reader class. */ @@ -74,6 +82,21 @@ class resource_reader_base_t { const std::string &str, int64_t jobid, int64_t at, uint64_t dur, bool rsv, uint64_t trav_token) = 0; + /*! Partial cancellation of jobid based on R. + * + * \param g resource graph + * \param m resource graph meta data + * \param mod_data struct containing resource types to counts, mod type, + * and set of ranks removed + * \param R resource set string + * \param jobid jobid of str + * \return 0 on success; non-zero integer on an error + */ + virtual int partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid) = 0; + /*! Set the allowlist: only resources that are part of this allowlist * will be unpacked into the graph. * diff --git a/resource/readers/resource_reader_grug.cpp b/resource/readers/resource_reader_grug.cpp index 53f320d1e..0114a44de 100644 --- a/resource/readers/resource_reader_grug.cpp +++ b/resource/readers/resource_reader_grug.cpp @@ -496,6 +496,15 @@ int resource_reader_grug_t::update (resource_graph_t &g, return -1; } +int resource_reader_grug_t::partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid) +{ + errno = ENOTSUP; // GRUG reader does not support partial cancel + return -1; +} + bool resource_reader_grug_t::is_allowlist_supported () { return false; diff --git a/resource/readers/resource_reader_grug.hpp b/resource/readers/resource_reader_grug.hpp index 0ecf6ebc0..88171160c 100644 --- a/resource/readers/resource_reader_grug.hpp +++ b/resource/readers/resource_reader_grug.hpp @@ -73,6 +73,21 @@ class resource_reader_grug_t : public resource_reader_base_t { const std::string &str, int64_t jobid, int64_t at, uint64_t dur, bool rsv, uint64_t trav_token); + /*! Partial cancellation of jobid based on R. + * + * \param g resource graph + * \param m resource graph meta data + * \param mod_data struct containing resource types to counts, mod type, + * and set of ranks removed + * \param R resource set string + * \param jobid jobid of str + * \return 0 on success; non-zero integer on an error + */ + virtual int partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid); + /*! Is the selected reader format support allowlist * * \return false diff --git a/resource/readers/resource_reader_hwloc.cpp b/resource/readers/resource_reader_hwloc.cpp index 0faa75886..d12f2e95e 100644 --- a/resource/readers/resource_reader_hwloc.cpp +++ b/resource/readers/resource_reader_hwloc.cpp @@ -523,6 +523,15 @@ int resource_reader_hwloc_t::update (resource_graph_t &g, return -1; } +int resource_reader_hwloc_t::partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid) +{ + errno = ENOTSUP; // hwloc reader does not support partial cancel + return -1; +} + bool resource_reader_hwloc_t::is_allowlist_supported () { return true; diff --git a/resource/readers/resource_reader_hwloc.hpp b/resource/readers/resource_reader_hwloc.hpp index 9817eda41..3e315e562 100644 --- a/resource/readers/resource_reader_hwloc.hpp +++ b/resource/readers/resource_reader_hwloc.hpp @@ -77,6 +77,21 @@ class resource_reader_hwloc_t : public resource_reader_base_t { const std::string &str, int64_t jobid, int64_t at, uint64_t dur, bool rsv, uint64_t trav_token); + /*! Partial cancellation of jobid based on R. + * + * \param g resource graph + * \param m resource graph meta data + * \param mod_data struct containing resource types to counts, mod type, + * and set of ranks removed + * \param R resource set string + * \param jobid jobid of str + * \return 0 on success; non-zero integer on an error + */ + virtual int partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid); + /*! Is the hwloc reader format support allowlist * * \return true diff --git a/resource/readers/resource_reader_jgf.cpp b/resource/readers/resource_reader_jgf.cpp index 425a350d3..a0bf8deb3 100644 --- a/resource/readers/resource_reader_jgf.cpp +++ b/resource/readers/resource_reader_jgf.cpp @@ -1297,6 +1297,15 @@ int resource_reader_jgf_t::remove_subgraph (resource_graph_t &g, } +int resource_reader_jgf_t::partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid) +{ + errno = ENOTSUP; // JGF reader does not support partial cancel + return -1; +} + bool resource_reader_jgf_t::is_allowlist_supported () { return false; diff --git a/resource/readers/resource_reader_jgf.hpp b/resource/readers/resource_reader_jgf.hpp index c44226c34..6836fa429 100644 --- a/resource/readers/resource_reader_jgf.hpp +++ b/resource/readers/resource_reader_jgf.hpp @@ -78,6 +78,21 @@ class resource_reader_jgf_t : public resource_reader_base_t { resource_graph_metadata_t &m, const std::string &path); + /*! Partial cancellation of jobid based on R. + * + * \param g resource graph + * \param m resource graph meta data + * \param mod_data struct containing resource types to counts, mod type, + * and set of ranks removed + * \param R resource set string + * \param jobid jobid of str + * \return 0 on success; non-zero integer on an error + */ + virtual int partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid); + /*! Is the selected reader format support allowlist * * \return false diff --git a/resource/readers/resource_reader_rv1exec.cpp b/resource/readers/resource_reader_rv1exec.cpp index f6f8fb1d0..3128ccfb3 100644 --- a/resource/readers/resource_reader_rv1exec.cpp +++ b/resource/readers/resource_reader_rv1exec.cpp @@ -941,6 +941,55 @@ int resource_reader_rv1exec_t::unpack_internal (resource_graph_t &g, return rc; } +int resource_reader_rv1exec_t::partial_cancel_internal (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + json_t *rv1) +{ + int rc = -1; + int version; + int64_t rank; + size_t index; + json_t *rlite = nullptr; + json_t *entry = nullptr; + const char *ranks = nullptr; + struct idset *r_ids = nullptr; + + // Implementing cancellation of rank subgraph + // will require further parsing of nodelist, + // children, and rank + if (json_unpack (rv1, "{s:i s:{s:o}}", + "version", &version, + "execution", + "R_lite", &rlite) < 0) { + errno = EINVAL; + goto error; + } + if (version != 1) { + errno = EINVAL; + goto error; + } + json_array_foreach (rlite, index, entry) { + if (json_unpack (entry, "{s:s}", + "rank", &ranks) < 0) { + errno = EINVAL; + goto error; + } + } + if ( !(r_ids = idset_decode (ranks))) + goto error; + rank = idset_first (r_ids); + while (rank != IDSET_INVALID_ID) { + mod_data.ranks_removed.insert (rank); + rank = idset_next (r_ids, rank); + } + idset_destroy (r_ids); + rc = 0; + +error: + return rc; +} + //////////////////////////////////////////////////////////////////////////////// @@ -1047,6 +1096,35 @@ bool resource_reader_rv1exec_t::is_allowlist_supported () return false; } +int resource_reader_rv1exec_t::partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid) +{ + int rc = -1; + json_error_t error; + json_t *rv1 = nullptr; + int saved_errno; + + if (R == "") { + errno = EINVAL; + goto ret; + } + + if ( !(rv1 = json_loads (R.c_str (), 0, &error))) { + errno = ENOMEM; + goto ret; + } + + rc = partial_cancel_internal (g, m, mod_data, rv1); + +ret: + saved_errno = errno; + json_decref (rv1); + errno = saved_errno; + return rc; +} + /* * vi:tabstop=4 shiftwidth=4 expandtab */ diff --git a/resource/readers/resource_reader_rv1exec.hpp b/resource/readers/resource_reader_rv1exec.hpp index 2caf9ff4c..e0a5184b0 100644 --- a/resource/readers/resource_reader_rv1exec.hpp +++ b/resource/readers/resource_reader_rv1exec.hpp @@ -89,6 +89,21 @@ class resource_reader_rv1exec_t : public resource_reader_base_t { const std::string &str, int64_t jobid, int64_t at, uint64_t dur, bool rsv, uint64_t trav_token); + /*! Partial cancellation of jobid based on R. + * + * \param g resource graph + * \param m resource graph meta data + * \param mod_data struct containing resource types to counts, mod type, + * and set of ranks removed + * \param R resource set string + * \param jobid jobid of str + * \return 0 on success; non-zero integer on an error + */ + virtual int partial_cancel (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + const std::string &R, int64_t jobid); + /*! Is the selected reader format support allowlist * * \return false @@ -131,7 +146,7 @@ class resource_reader_rv1exec_t : public resource_reader_base_t { int add_cluster_vertex (resource_graph_t &g, resource_graph_metadata_t &m); - // Update functions + vtx_t find_vertex (resource_graph_t &g, resource_graph_metadata_t &m, vtx_t parent, int64_t id, const std::string &subsys, @@ -140,6 +155,7 @@ class resource_reader_rv1exec_t : public resource_reader_base_t { const std::string &name, int size, int rank); + // Update functions int update_vertex (resource_graph_t &g, vtx_t vtx, updater_data &update_data); @@ -213,6 +229,12 @@ class resource_reader_rv1exec_t : public resource_reader_base_t { int unpack_internal (resource_graph_t &g, resource_graph_metadata_t &m, json_t *rv1, updater_data &update_data); + + int partial_cancel_internal (resource_graph_t &g, + resource_graph_metadata_t &m, + modify_data_t &mod_data, + json_t *rv1); + }; } // namespace resource_model From 7908bc03c10e07cd8bc45664188867df4b609c2a Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Thu, 4 Apr 2024 19:00:06 -0700 Subject: [PATCH 02/24] traverser: delete job2span entry for removed jobid Problem: the current implementation of rem_agfilter doesn't remove the job2span entry when removing a job. Surprisingly, this hasn't caused test failures or production problems. Add the approrpiate removal of the job2span entry. --- resource/traversers/dfu_impl_update.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/resource/traversers/dfu_impl_update.cpp b/resource/traversers/dfu_impl_update.cpp index b7e346949..81a12b400 100644 --- a/resource/traversers/dfu_impl_update.cpp +++ b/resource/traversers/dfu_impl_update.cpp @@ -408,6 +408,7 @@ int dfu_impl_t::rem_agfilter (vtx_t u, int64_t jobid, m_err_msg += strerror (errno); m_err_msg += "\n"; } + job2span.erase (jobid); done: return rc; From a49764ea2be4b3d525e90a692847d741f11c9211 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 17 Jun 2024 10:43:54 -0700 Subject: [PATCH 03/24] traverser: fix confusing indentation Problem: the indentation of the check for `full` is confusing. Adjust the indentation and pull the declaration of the planner and span to the top of the function. --- resource/traversers/dfu_impl_update.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/resource/traversers/dfu_impl_update.cpp b/resource/traversers/dfu_impl_update.cpp index 81a12b400..60ce38470 100644 --- a/resource/traversers/dfu_impl_update.cpp +++ b/resource/traversers/dfu_impl_update.cpp @@ -150,18 +150,16 @@ int dfu_impl_t::upd_plan (vtx_t u, const subsystem_t &s, unsigned int needs, int &n) { int rc = 0; + int64_t span = -1; + planner_t *plans = NULL; if (excl) { - n++; - if (!full) { + if (!full) { // If not full mode, plan has already been updated, thus return. return 0; } - int64_t span = -1; - planner_t *plans = NULL; - if ( (plans = (*m_graph)[u].schedule.plans) == NULL) { m_err_msg += __FUNCTION__; m_err_msg += ": plans not installed.\n"; From 693a97379255b034b75fba0c0772a0ed6ea760ad Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Thu, 4 Apr 2024 23:47:05 -0700 Subject: [PATCH 04/24] planner interface: add span reduction functionality Problem: the planner currently cannot remove resources from a span. This functionality is necessary for partial cancellation. Add a helper function to reduce the span scheduled quantity. Add an interface function to remove span resources and check if the values to be removed are valid. Special care must be taken for 0 spans or removal of 0 resources. Zero spans are used to track individaul planner resource spans as part of a span of multiple planner resource types tracked by a planner_multi. Add a check for removing 0 resources which can also be used to check if a span is valid. Return an updated bool which indicates whether all span resource have been removed. --- resource/planner/c/planner.h | 18 +++++++ resource/planner/c/planner_c_interface.cpp | 62 ++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/resource/planner/c/planner.h b/resource/planner/c/planner.h index a72aefa13..7615264ee 100644 --- a/resource/planner/c/planner.h +++ b/resource/planner/c/planner.h @@ -210,6 +210,24 @@ int64_t planner_add_span (planner_t *ctx, int64_t start_time, uint64_t duration, */ int planner_rem_span (planner_t *ctx, int64_t span_id); +/*! Reduce the existing span's resources from the planner. + * This function will be called for a partial release/cancel. + * If the number of resources to be removed is equal to those + * allocated to the span, completely remove the span. + * + * \param ctx opaque planner context returned from planner_new. + * \param span_id span_id returned from planner_add_span. + * \param to_remove number of resources to free from the span + * \param removed bool indicating if the entire span was removed. + * \return 0 on success; -1 on an error with errno set as follows: + * EINVAL: invalid argument. + * EKEYREJECTED: span could not be removed from + * the planner's internal data structures. + * ERANGE: a resource state became out of a valid range. + */ +int planner_reduce_span (planner_t *ctx, int64_t span_id, + int64_t to_remove, bool &removed); + //! Span iterators -- there is no specific iteration order int64_t planner_span_first (planner_t *ctx); int64_t planner_span_next (planner_t *ctx); diff --git a/resource/planner/c/planner_c_interface.cpp b/resource/planner/c/planner_c_interface.cpp index bfa122393..31d69e12f 100644 --- a/resource/planner/c/planner_c_interface.cpp +++ b/resource/planner/c/planner_c_interface.cpp @@ -124,6 +124,22 @@ static int update_points_subtract_span (planner_t *ctx, return rc; } +static int update_points_partial_subtract_span (planner_t *ctx, + std::list &list, + int64_t to_remove) +{ + for (auto &point : list) { + point->scheduled -= to_remove; + point->remaining += to_remove; + if ( (point->scheduled < 0) + || (point->remaining > ctx->plan->get_total_resources ())) { + errno = ERANGE; + return -1; + } + } + return 0; +} + static bool span_ok (planner_t *ctx, scheduled_point_t *start_point, uint64_t duration, int64_t request) { @@ -583,6 +599,52 @@ extern "C" int planner_rem_span (planner_t *ctx, int64_t span_id) return rc; } +extern "C" int planner_reduce_span (planner_t *ctx, int64_t span_id, + int64_t to_remove, bool &removed) +{ + int rc = -1; + uint64_t duration = 0; + std::map>::iterator it; + + removed = false; + if (!ctx) { + errno = EINVAL; + return -1; + } + it = ctx->plan->get_span_lookup ().find (span_id); + if (it == ctx->plan->get_span_lookup ().end ()) { + errno = EINVAL; + return -1; + } + std::shared_ptr &span = it->second; + // Planned values can be 0 (especially when part of planner_multi), + // and to_remove may be zero as well. We want to remove spans where + // Planned values and to_remove are identically zero. + if (to_remove == span->planned) { + // Removing the whole span + rc = planner_rem_span (ctx, span_id); + removed = true; + } else if (to_remove == 0) { + rc = 0; + } else if (to_remove < span->planned) { + // Removing partial span resources + restore_track_points (ctx); + span->planned -= to_remove; + std::list list; + duration = span->last - span->start; + fetch_overlap_points (ctx, span->start, duration, list); + update_points_partial_subtract_span (ctx, list, to_remove); + update_mintime_resource_tree (ctx, list); + rc = 0; + } else { + // Error + errno = EINVAL; + rc -1; + } + + return rc; +} + extern "C" int64_t planner_span_first (planner_t *ctx) { if (!ctx) { From 1bcdf263a18481c819b66c0220a4c9c0a37b8f41 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 5 Apr 2024 00:26:43 -0700 Subject: [PATCH 05/24] planner_multi: get random_access index based on resource type Problem: a partial cancellation will accumulate resource counts to be removed in a reader and the traverser. There is no guarantee that the ordering of resource types encountered will be the same as when the aggregate filter planner_multi was initialized. Therefore the vector index of the resource does not provide a reliable mapping between planner_multi resources and those found during the string unpacking and traversal. Add a function to get the multi_index index of a resource type based on the resource name string. --- resource/planner/c++/planner_multi.cpp | 7 +++++++ resource/planner/c++/planner_multi.hpp | 1 + 2 files changed, 8 insertions(+) diff --git a/resource/planner/c++/planner_multi.cpp b/resource/planner/c++/planner_multi.cpp index 695af53e4..407378071 100644 --- a/resource/planner/c++/planner_multi.cpp +++ b/resource/planner/c++/planner_multi.cpp @@ -285,6 +285,13 @@ const char *planner_multi::get_resource_type_at (size_t i) const return m_types_totals_planners.at (i).resource_type.c_str (); } +size_t planner_multi::get_resource_type_idx (const char *type) const +{ + auto by_res = m_types_totals_planners.get ().find (type); + auto curr_idx = m_types_totals_planners.get ().iterator_to (*by_res); + return curr_idx - m_types_totals_planners.begin (); +} + struct request_multi &planner_multi::get_iter () { return m_iter; diff --git a/resource/planner/c++/planner_multi.hpp b/resource/planner/c++/planner_multi.hpp index 80639550d..0cfdd71a3 100644 --- a/resource/planner/c++/planner_multi.hpp +++ b/resource/planner/c++/planner_multi.hpp @@ -87,6 +87,7 @@ class planner_multi { int64_t get_resource_total_at (size_t i) const; int64_t get_resource_total_at (const char *type) const; const char *get_resource_type_at (size_t i) const; + size_t get_resource_type_idx (const char *type) const; struct request_multi &get_iter (); // Span lookup functions std::map> &get_span_lookup (); From 40f76d7c9e9e84b1ab6776b16040139c10a3de2b Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 5 Apr 2024 00:29:21 -0700 Subject: [PATCH 06/24] planner_multi interface: add span partial cancel Problem: partial cancellation requires the capability to remove parts of a planner_multi span. The functionality takes a vector of type names and new counts and must reduce the appropriate planner sub-spans. Add the interface function to provide the require capability. The function iterates through each resource type to be adjusted and maps the name to its index in the planner_multi span. That identifies the sub-planner span index for reduction of the planner resource type. The loop then calls the planner_reduce_span function which performs the resource reduction and returns whether the reduction resulted in a complete removal of the span. The return code of the function could be -1 if either the reduction failed, or if the span no longer exists because it was removed by a previous invocation of planner)reduce_span. Track the removed planner sub-spans by -1 entries. The second loop is required to check for resources not covered by the resource_types function input vector. This can occur when a partial reduction call does not include zero resource spans entered when the planner_multi span was originally created. This can happen when a job requests cores, but the planner_multi agfilter tracks cores and memory. A span will be created for cores and memory, but the memory request will be 0. We need to remove these spans. Finally, check to see if all subspans were removed by counting the number of -1 span entries. --- resource/planner/c/planner_multi.h | 29 +++++ .../planner/c/planner_multi_c_interface.cpp | 111 +++++++++++++++++- 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/resource/planner/c/planner_multi.h b/resource/planner/c/planner_multi.h index 8b2d1de40..528eda982 100644 --- a/resource/planner/c/planner_multi.h +++ b/resource/planner/c/planner_multi.h @@ -278,6 +278,35 @@ int64_t planner_multi_add_span (planner_multi_t *ctx, int64_t start_time, */ int planner_multi_rem_span (planner_multi_t *ctx, int64_t span_id); +/*! Reduce the existing span's resources from the planner. + * This function will be called for a partial release/cancel. + * If the number of resources to be removed is equal to those + * allocated to the span, completely remove the span. + * + * \param ctx opaque multi-planner context returned + * from planner_multi_new. + * \param span_id span_id returned from planner_add_span. + * \param reduced_totals + * 64-bit unsigned integer array of size len where each + * element contains the total count of available resources + * of a single resource type. + * \param resource_types + * string array of size len where each element contains + * the resource type corresponding to each corresponding + * element in the resource_totals array. + * \param len length of the resource_totals and resource_types arrays. + * \param removed bool indicating if the entire span was removed. + * \return 0 on success; -1 on error with errno set as follows: + * EINVAL: invalid argument. + * EKEYREJECTED: span could not be removed from + * the planner's internal data structures. + * ERANGE: a resource state became out of a valid range. + */ +int planner_multi_reduce_span (planner_multi_t *ctx, int64_t span_id, + const uint64_t *reduced_totals, + const char **resource_types, size_t len, + bool &removed); + //! Span iterators -- there is no specific iteration order // return -1 when you no longer can iterate: EINVAL when ctx is NULL. // ENOENT when you reached the end of the spans diff --git a/resource/planner/c/planner_multi_c_interface.cpp b/resource/planner/c/planner_multi_c_interface.cpp index 1e57cc66b..958a3d900 100644 --- a/resource/planner/c/planner_multi_c_interface.cpp +++ b/resource/planner/c/planner_multi_c_interface.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "planner_multi.h" #include "resource/planner/c++/planner_multi.hpp" @@ -414,6 +414,115 @@ extern "C" int planner_multi_rem_span (planner_multi_t *ctx, int64_t span_id) return rc; } +extern "C" int planner_multi_reduce_span (planner_multi_t *ctx, + int64_t span_id, + const uint64_t *reduced_totals, + const char **resource_types, + size_t len, + bool &removed) +{ + size_t i = 0; + int rc = -1; + bool tmp_removed = false; + size_t mspan_idx; + int64_t mspan_sum = 0; + std::set ext_res_types; + + removed = false; + if (!ctx || span_id < 0 || !reduced_totals || !resource_types) { + errno = EINVAL; + return -1; + } + auto span_it = ctx->plan_multi->get_span_lookup ().find (span_id); + if (span_it == ctx->plan_multi->get_span_lookup ().end ()) { + errno = ENOENT; + return -1; + } + for (i = 0; i < len; ++i) { + if (reduced_totals[i] > + static_cast (std::numeric_limits::max ())) { + errno = ERANGE; + return -1; + } + // Index could be different than the span_lookup due to order of + // iteration in the reader differing from the graph initialization + // order. + mspan_idx = ctx->plan_multi->get_resource_type_idx (resource_types[i]); + // Resource type not found; can happen if agfilter doesn't track resource + if (mspan_idx >= ctx->plan_multi->get_planners_size ()) + continue; + + tmp_removed = false; + if ( (rc = planner_reduce_span ( + ctx->plan_multi->get_planner_at (mspan_idx), + span_it->second.at (mspan_idx), + reduced_totals[i], + tmp_removed)) == -1) { + // Could return -1 if the span with 0 resource request had been removed + // by a previous cancellation, so need to check if the span exists. + if (planner_is_active_span (ctx->plan_multi->get_planner_at (mspan_idx), + span_it->second.at (mspan_idx))) { + // We know the span is valid, so planner_reduce_span + // encountered another error. + errno = EINVAL; + goto error; + } + } + ext_res_types.insert (mspan_idx); + // Enter invalid span ID in the span_lookup to indicate the resource + // removal. + if (tmp_removed) + span_it->second[mspan_idx] = -1; + } + // Iterate over planner_multi resources since resource_types may not cover + // all planner_multi resources. If resource_types contains fewer types + // than the total planner_multi resources, this means the reader partial + // cancel didn't encounter those resource types. This can happen since + // agfilter requests for 0 resources are entered for resource types + // tracked by the agfilter that the job didn't request. Ex: job requests + // cores, but the agfilter tracks cores and memory. A span will be created + // for cores and memory, but the memory request will be 0. We need to + // remove these spans. + for (i = 0; i < ctx->plan_multi->get_planners_size (); ++i) { + tmp_removed = false; + // Check if the resource type was already processed in a previous + // loop. + if (ext_res_types.find (i) == ext_res_types.end ()) { + if ( (rc = planner_reduce_span (ctx->plan_multi->get_planner_at (i), + span_it->second.at (i), + 0, + tmp_removed)) == -1) { + // Could return -1 if the span with 0 resource request had been + // removed by a previous cancellation, so need to check if the + // span exists. + if (planner_is_active_span (ctx->plan_multi->get_planner_at (i), + span_it->second.at (i))) { + // We know the span is valid, so planner_reduce_span + // encountered another error. + errno = EINVAL; + goto error; + } + } + // Enter invalid span ID in the span_lookup to indicate the + // resource removal. + if (tmp_removed) + span_it->second[i] = -1; + } + } + mspan_sum = std::accumulate (span_it->second.begin (), + span_it->second.end (), + 0, std::plus ()); + // Delete if all entries are -1 + if (mspan_sum == (-1 * span_it->second.size ())) { + ctx->plan_multi->get_span_lookup ().erase (span_it); + removed = true; + } + + rc = 0; +error: + return rc; +} + int64_t planner_multi_span_first (planner_multi_t *ctx) { int64_t rc = -1; From fb00e95c66697ee9406ded63899505b4770a48d7 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sun, 7 Apr 2024 00:52:18 -0700 Subject: [PATCH 07/24] planner: add unit tests for partial cancel Problem: there are no unit tests for planner partial cancel. Add tests to ensure consistency and that a sequence of partial cancels that remove all job resources is identical to a full cancel. --- resource/planner/test/planner_test01.cpp | 59 +++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/resource/planner/test/planner_test01.cpp b/resource/planner/test/planner_test01.cpp index c7077c485..560278abb 100644 --- a/resource/planner/test/planner_test01.cpp +++ b/resource/planner/test/planner_test01.cpp @@ -741,7 +741,7 @@ static int test_update () uint64_t resource4 = 2304; uint64_t resource5 = 468; uint64_t resource6 = 50000; - int64_t avail, avail1 = 0; + int64_t avail = 0, avail1 = 0; const char resource_type[] = "core"; planner_t *ctx = NULL, *ctx2 = NULL; @@ -775,6 +775,10 @@ static int test_update () span = planner_add_span (ctx, 1152000, 57600, resource6); bo = (bo || (planners_equal (ctx, ctx2)) || span != -1); ok (!bo, "reducing resources below request should prevent scheduling"); + span = planner_add_span (ctx, 1152000, 57600, 40000); + //rc = planner_reduce_span (ctx, span, 40000, removed); + //std::cout << "Reduce span rc: " << rc << " removed: " << removed << " errno: " << errno << "\n"; + rc = planner_rem_span (ctx, span); planner_destroy (&ctx); planner_destroy (&ctx2); @@ -782,9 +786,58 @@ static int test_update () return 0; } +static int test_partial_cancel () +{ + int rc; + int64_t span1 = -1, span2 = -1, span3 = -1, span4 = -1; + bool bo = false, removed = false; + uint64_t resource_total = 100; + uint64_t resource1 = 25; + uint64_t resource2 = 75; + int64_t avail1 = 0, avail2 = 0, avail3 = 0; + const char resource_type[] = "core"; + planner_t *ctx = NULL; + + ctx = planner_new (0, INT64_MAX, resource_total, resource_type); + // Add some spans + span1 = planner_add_span (ctx, 0, 600, resource1); + span2 = planner_add_span (ctx, 0, 1200, resource2); + + rc = planner_reduce_span (ctx, span1, 15, removed); + avail1 = planner_avail_resources_at (ctx, 0); + bo = (bo || removed || avail1 != 15); + ok (!bo, "reducing span resources results in expected availability"); + + removed = false; + rc = planner_reduce_span (ctx, span1, 10, removed); + avail1 = planner_avail_resources_at (ctx, 0); + bo = (bo || avail1 != 25 || !removed); + ok (!bo, "reducing all span resources results in expected availability and full removal"); + + removed = false; + rc = planner_reduce_span (ctx, span2, 25, removed); + avail1 = planner_avail_resources_at (ctx, 300); + span3 = planner_add_span (ctx, 300, 300, 50); + avail2 = planner_avail_resources_at (ctx, 300); + bo = (bo || avail1 != 50 || avail2 != 0 || removed); + ok (!bo, "reducing overlapping span enables new span with expected allocation"); + + removed = false; + planner_rem_span (ctx, span2); + rc = planner_reduce_span (ctx, span3, 50, removed); + span3 = planner_add_span (ctx, 300, 300, 100); + avail3 = planner_avail_resources_at (ctx, 300); + bo = (bo || avail3 != 0 || span3 == -1 || rc != 0 || !removed); + ok (!bo, "reducing span to zero removes allocation and allows subsequent full allocation"); + + planner_destroy (&ctx); + + return 0; +} + int main (int argc, char *argv[]) { - plan (67); + plan (71); test_planner_getters (); @@ -810,6 +863,8 @@ int main (int argc, char *argv[]) test_update (); + test_partial_cancel (); + done_testing (); return EXIT_SUCCESS; From 2d8b7b154c0b50a05c8450f09fdee829b17b6c18 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sun, 7 Apr 2024 02:44:36 -0700 Subject: [PATCH 08/24] planner_multi: add unit tests for partial cancel Problem: there are no unit tests for planner_multi partial cancel. Add tests to ensure consistency and that a sequence of partial cancels that remove all job resources is identical to a full cancel. Also ensure resource permutations and zero spans are handled correctly. --- resource/planner/test/planner_test02.cpp | 107 ++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/resource/planner/test/planner_test02.cpp b/resource/planner/test/planner_test02.cpp index 76b42a420..f6fa54196 100644 --- a/resource/planner/test/planner_test02.cpp +++ b/resource/planner/test/planner_test02.cpp @@ -699,9 +699,112 @@ static int test_multi_update () } +static int test_partial_cancel () +{ + bool bo = false, removed = false, removed1 = false, removed2 = false, + removed3 = false, removed4 = false; + size_t len = 5; + int rc = -1; + int64_t span1 = -1, span2 = -1, span3 = -1, span4 = -1, avail1 = -1, + avail2 = -1, avail3 = -1, avail4 = -1, avail5 = -1, avail6 = -1, + avail7 = -1; + const uint64_t resource_totals[] = {10, 20, 30, 40, 50}; + const char *resource_types[] = {"A", "B", "C", "D", "E"}; + const char *resource_types1[] = {"B", "A", "E"}; + const char *resource_types2[] = {"B", "A", "C", "D", "G", "X", "Y"}; + const char *resource_types3[] = {"C", "D", "A", "B", "E"}; + const char *resource_types4[] = {"D"}; + const char *resource_types5[] = {"A"}; + const char *resource_types6[] = {"B"}; + const uint64_t reduce1[] = {1, 0, 0, 0, 0}; + const uint64_t reduce2[] = {2, 1, 5}; + const uint64_t reduce3[] = {2, 1, 5, 6, 7, 8, 9}; + const uint64_t reduce4[] = {3, 3, 0, 0, 0}; + const uint64_t reduce5[] = {1}; + const uint64_t request1[] = {2, 0, 0, 0, 0}; + const uint64_t request2[] = {1, 2, 3, 4, 5}; + const uint64_t request3[] = {2, 2, 0, 0, 0}; + planner_multi_t *ctx = NULL; + + ctx = planner_multi_new (0, INT64_MAX, resource_totals, resource_types, len); + + span1 = planner_multi_add_span (ctx, 0, 1000, request1, len); + span2 = planner_multi_add_span (ctx, 0, 2000, request2, len); + rc = planner_multi_reduce_span (ctx, span1, reduce1, resource_types, + 5, removed); + avail1 = planner_multi_avail_resources_at (ctx, 0, 0); + bo = (bo || avail1 != 8 || removed || rc != 0); + ok (!bo, "reducing span results in expected availability counts and doesn't remove span"); + + removed = false; + rc = planner_multi_reduce_span (ctx, span1, reduce1, resource_types, + 5, removed); + avail1 = planner_multi_avail_resources_at (ctx, 0, 0); + bo = (bo || avail1 != 9 || !removed || rc != 0); + ok (!bo, "two partial reductions with appropriate removals totally remove span"); + + removed = false; + rc = planner_multi_reduce_span (ctx, span2, reduce2, resource_types1, + 3, removed); + avail2 = planner_multi_avail_resources_at (ctx, 0, 1); + bo = (bo || avail2 != 20 || removed || rc != 0); + ok (!bo, "underspecified and reordered reduction types is handled correctly"); + + removed = false; + rc = planner_multi_reduce_span (ctx, span2, reduce3, resource_types2, + 7, removed); + avail2 = planner_multi_avail_resources_at (ctx, 0, 2); + bo = (bo || avail2 != 27 || removed || rc != -1); + ok (!bo, "incorrect resource type reduction does not change availability"); + + removed = false; + rc = planner_multi_reduce_span (ctx, span2, reduce4, resource_types3, + 5, removed); + avail2 = planner_multi_avail_resources_at (ctx, 0, 3); + bo = (bo || avail2 != 39 || removed || rc != 0); + ok (!bo, "reordered partial reduction results in correct availability"); + + removed = false; + rc = planner_multi_reduce_span (ctx, span2, reduce5, resource_types4, + 1, removed); + avail2 = planner_multi_avail_resources_at (ctx, 0, 3); + bo = (bo || avail2 != 40 || !removed || rc != 0); + ok (!bo, "removing final span resource completely removes span"); + + span3 = planner_multi_add_span (ctx, 0, 2000, resource_totals, len); + avail1 = planner_multi_avail_resources_at (ctx, 1000, 0); + avail2 = planner_multi_avail_resources_at (ctx, 1000, 1); + avail3 = planner_multi_avail_resources_at (ctx, 1000, 2); + avail4 = planner_multi_avail_resources_at (ctx, 1000, 3); + avail5 = planner_multi_avail_resources_at (ctx, 1000, 4); + bo = (bo || avail1 != 0 || avail2 != 0 || avail3 != 0 || avail4 != 0 + || avail5 != 0 || !removed || rc != 0); + ok (!bo, "can fully allocate resources after partial removals"); + + span4 = planner_multi_add_span (ctx, 3000, 1000, request3, len); + rc = planner_multi_reduce_span (ctx, span4, reduce5, resource_types5, + 1, removed1); + rc = planner_multi_reduce_span (ctx, span4, reduce5, resource_types5, + 1, removed2); + rc = planner_multi_reduce_span (ctx, span4, reduce5, resource_types6, + 1, removed3); + rc = planner_multi_reduce_span (ctx, span4, reduce5, resource_types6, + 1, removed4); + avail6 = planner_multi_avail_resources_at (ctx, 3500, 0); + avail7 = planner_multi_avail_resources_at (ctx, 3500, 1); + bo = (bo || avail6 != 10 || avail7 != 20 || removed1 || removed2 + || removed3 || !removed4 || rc != 0); + ok (!bo, "series of partial removals fully removes span"); + + planner_multi_destroy (&ctx); + + return 0; + +} + int main (int argc, char *argv[]) { - plan (98); + plan (106); test_multi_basics (); @@ -719,6 +822,8 @@ int main (int argc, char *argv[]) test_multi_update (); + test_partial_cancel (); + done_testing (); return EXIT_SUCCESS; From fdb7b1de9b04a8c638df5826d4b0866e72cadd65 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sun, 7 Apr 2024 22:34:53 -0700 Subject: [PATCH 09/24] traverser: correct typo in match_kind_t Problem: PRISTINE is spelled PRESTINE in match_kind_t. Correct the spelling. --- resource/traversers/dfu_impl.cpp | 2 +- resource/traversers/dfu_impl.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resource/traversers/dfu_impl.cpp b/resource/traversers/dfu_impl.cpp index 4cf44a89e..09d9dceb0 100644 --- a/resource/traversers/dfu_impl.cpp +++ b/resource/traversers/dfu_impl.cpp @@ -342,7 +342,7 @@ const std::vector &dfu_impl_t::test (vtx_t u, pristine = false; ret = &(match_resources->with); } else { - spec = pristine? match_kind_t::PRESTINE_NONE_MATCH + spec = pristine? match_kind_t::PRISTINE_NONE_MATCH : match_kind_t::NONE_MATCH; } diff --git a/resource/traversers/dfu_impl.hpp b/resource/traversers/dfu_impl.hpp index 140a553fc..7a43426d5 100644 --- a/resource/traversers/dfu_impl.hpp +++ b/resource/traversers/dfu_impl.hpp @@ -38,7 +38,7 @@ enum class visit_t { DFV, UPV }; enum class match_kind_t { RESOURCE_MATCH, SLOT_MATCH, NONE_MATCH, - PRESTINE_NONE_MATCH }; + PRISTINE_NONE_MATCH }; struct jobmeta_t { From 0bad31bf94bf4ebbd6ed42199e72d1b59b93fd79 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 8 Apr 2024 00:58:11 -0700 Subject: [PATCH 10/24] traverser: add partial cancellation functionality Problem: Fluxion issue https://github.com/flux-framework/flux-sched/issues/1151 and flux-core issue https://github.com/flux-framework/flux-core/issues/4312 identified the need for partial release of resources. The current functionality need is to release all resources managed by a single broker rank. In the future support for releasing arbitrary subgraphs will be needed for cloud and converged use cases. Modify the rem_* traverser functions to take a modification type and type_to_count unordered_map. Add logic in the recursive job modification calls to distinguish between a full and partial job cancellation and issue corresponding planner interface calls, handling errors as needed. Switch cancallation behavior based on the job_modify_t enum class. --- resource/traversers/dfu.cpp | 18 ++ resource/traversers/dfu.hpp | 16 ++ resource/traversers/dfu_impl.hpp | 36 ++- resource/traversers/dfu_impl_update.cpp | 304 +++++++++++++++++++----- 4 files changed, 307 insertions(+), 67 deletions(-) diff --git a/resource/traversers/dfu.cpp b/resource/traversers/dfu.cpp index ea274481d..8f9e3a606 100644 --- a/resource/traversers/dfu.cpp +++ b/resource/traversers/dfu.cpp @@ -468,6 +468,24 @@ int dfu_traverser_t::remove (int64_t jobid) return detail::dfu_impl_t::remove (root, jobid); } +int dfu_traverser_t::remove (const std::string &R_to_cancel, + std::shared_ptr &reader, + int64_t jobid, bool &full_cancel) +{ + const subsystem_t &dom = get_match_cb ()->dom_subsystem (); + if (!get_graph () || !get_graph_db () + || get_graph_db ()->metadata.roots.find (dom) + == get_graph_db ()->metadata.roots.end () + || !get_match_cb ()) { + errno = EINVAL; + return -1; + } + + vtx_t root = get_graph_db ()->metadata.roots.at (dom); + return detail::dfu_impl_t::remove (root, R_to_cancel, reader, jobid, + full_cancel); +} + int dfu_traverser_t::mark (const std::string &root_path, resource_pool_t::status_t status) { diff --git a/resource/traversers/dfu.hpp b/resource/traversers/dfu.hpp index ccf424dd5..a1a043b81 100644 --- a/resource/traversers/dfu.hpp +++ b/resource/traversers/dfu.hpp @@ -164,6 +164,22 @@ class dfu_traverser_t : protected detail::dfu_impl_t */ int remove (int64_t jobid); + /*! Remove the allocation/reservation referred to by jobid and update + * the resource state. + * + * \param R_to_cancel deallocation string such as written in JGF. + * \param reader reader object that deserialize str to update the + * graph + * \param jobid job id. + * \param full_cancel bool indicating if the partial cancel cancelled all + * job resources + * \return 0 on success; -1 on error. + * EINVAL: graph, roots or match callback not set. + */ + int remove (const std::string &to_cancel, + std::shared_ptr &reader, + int64_t jobid, bool &full_cancel); + /*! Mark the resource status up|down|etc starting at subtree_root. * * \param root_path path to the root of the subtree to update. diff --git a/resource/traversers/dfu_impl.hpp b/resource/traversers/dfu_impl.hpp index 7a43426d5..09b96aa83 100644 --- a/resource/traversers/dfu_impl.hpp +++ b/resource/traversers/dfu_impl.hpp @@ -292,6 +292,21 @@ class dfu_impl_t { */ int remove (vtx_t root, int64_t jobid); + /*! Remove the allocation/reservation referred to by jobid and update + * the resource state. + * + * \param root root resource vertex. + * \param to_cancel deallocation string such as written in JGF. + * \param reader reader object that deserialize str to update the graph + * \param jobid job id. + * \param full_cancel bool indicating if the partial cancel cancelled all + * job resources + * \return 0 on success; -1 on error. + */ + int remove (vtx_t root, const std::string &to_cancel, + std::shared_ptr &reader, + int64_t jobid, bool &full_cancel); + /*! Update the resource status to up|down|etc starting at subtree_root. * * \param root_path path to the root of the subtree to update. @@ -474,14 +489,19 @@ class dfu_impl_t { unsigned int needs, bool excl, const jobmeta_t &jobmeta, bool full, std::map &to_parent, bool emit_shadow); - - int rem_txfilter (vtx_t u, int64_t jobid, bool &stop); - int rem_agfilter (vtx_t u, int64_t jobid, const std::string &s); - int rem_idata (vtx_t u, int64_t jobid, const std::string &s, bool &stop); - int rem_plan (vtx_t u, int64_t jobid); - int rem_upv (vtx_t u, int64_t jobid); - int rem_dfv (vtx_t u, int64_t jobid); - int rem_exv (int64_t jobid); + bool rem_tag (vtx_t u, int64_t jobid); + int rem_exclusive_filter (vtx_t u, int64_t jobid, + const modify_data_t &mod_data); + int mod_agfilter (vtx_t u, int64_t jobid, const std::string &s, + const modify_data_t &mod_data, bool &stop); + int mod_idata (vtx_t u, int64_t jobid, const std::string &s, + const modify_data_t &mod_data, bool &stop); + int mod_plan (vtx_t u, int64_t jobid, modify_data_t &mod_data); + int mod_upv (vtx_t u, int64_t jobid, const modify_data_t &mod_data); + int mod_dfv (vtx_t u, int64_t jobid, modify_data_t &mod_data); + int mod_exv (int64_t jobid, const modify_data_t &mod_data); + int cancel_vertex (vtx_t vtx, modify_data_t &mod_data, + int64_t jobid); /************************************************************************ diff --git a/resource/traversers/dfu_impl_update.cpp b/resource/traversers/dfu_impl_update.cpp index 60ce38470..710ee07b4 100644 --- a/resource/traversers/dfu_impl_update.cpp +++ b/resource/traversers/dfu_impl_update.cpp @@ -78,7 +78,7 @@ int dfu_impl_t::upd_agfilter (vtx_t u, const subsystem_t &s, count_relevant_types (subtree_plan, dfu, aggregate); span = planner_multi_add_span (subtree_plan, jobmeta.at, jobmeta.duration, - &(aggregate[0]), aggregate.size ()); + aggregate.data (), aggregate.size ()); if (span == -1) { m_err_msg += __FUNCTION__; m_err_msg += ": planner_multi_add_span returned -1.\n"; @@ -348,29 +348,28 @@ int dfu_impl_t::upd_dfv (vtx_t u, std::shared_ptr &writers, excl, n_plans, jobmeta, full, dfu, to_parent); } -int dfu_impl_t::rem_txfilter (vtx_t u, int64_t jobid, bool &stop) +int dfu_impl_t::rem_exclusive_filter (vtx_t u, int64_t jobid, + const modify_data_t &mod_data) { int rc = -1; int64_t span = -1; planner_t *x_checker = NULL; - auto &x_spans = (*m_graph)[u].idata.x_spans; - auto &tags = (*m_graph)[u].idata.tags; - if (tags.find (jobid) == tags.end ()) { - stop = true; - rc = 0; - goto done; - } - if (x_spans.find (jobid) == x_spans.end ()) { - m_err_msg += __FUNCTION__; - m_err_msg += ": jobid isn't found in x_spans table.\n "; - goto done; + auto span_it = (*m_graph)[u].idata.x_spans.find (jobid); + if (span_it == (*m_graph)[u].idata.x_spans.end ()) { + if (mod_data.mod_type != job_modify_t::PARTIAL_CANCEL) { + m_err_msg += __FUNCTION__; + m_err_msg += ": jobid isn't found in x_spans table.\n "; + goto done; + } else { + rc = 0; + goto done; + } } x_checker = (*m_graph)[u].idata.x_checker; - (*m_graph)[u].idata.tags.erase (jobid); - span = (*m_graph)[u].idata.x_spans[jobid]; - (*m_graph)[u].idata.x_spans.erase (jobid); + span = span_it->second; + (*m_graph)[u].idata.x_spans.erase (span_it); if ( (rc = planner_rem_span (x_checker, span)) == -1) { m_err_msg += __FUNCTION__; m_err_msg += "planner_rem_span returned -1.\n"; @@ -383,113 +382,223 @@ int dfu_impl_t::rem_txfilter (vtx_t u, int64_t jobid, bool &stop) return rc; } -int dfu_impl_t::rem_agfilter (vtx_t u, int64_t jobid, - const std::string &subsystem) +bool dfu_impl_t::rem_tag (vtx_t u, int64_t jobid) +{ + auto tag_it = (*m_graph)[u].idata.tags.find (jobid); + if (tag_it == (*m_graph)[u].idata.tags.end ()) { + // stop removal + return true; + } else { + (*m_graph)[u].idata.tags.erase (tag_it); + return false; + } +} + +int dfu_impl_t::mod_agfilter (vtx_t u, int64_t jobid, + const std::string &subsystem, + const modify_data_t &mod_data, + bool &stop) { int rc = 0; - int span = -1; + bool removed = false; planner_multi_t *subtree_plan = NULL; auto &job2span = (*m_graph)[u].idata.job2span; + std::map::iterator span_it; - if ((subtree_plan = (*m_graph)[u].idata.subplans[subsystem]) == NULL) + if ( (subtree_plan = (*m_graph)[u].idata.subplans[subsystem]) == NULL) goto done; - if (job2span.find (jobid) == job2span.end ()) + + span_it = job2span.find (jobid); + if (span_it == job2span.end ()) { + if (mod_data.mod_type == job_modify_t::PARTIAL_CANCEL) + stop = true; goto done; - if ((span = job2span[jobid]) == -1) { + } + if (span_it->second == -1) { rc = -1; goto done; } - if ((rc = planner_multi_rem_span (subtree_plan, span)) != 0) { - m_err_msg += __FUNCTION__; - m_err_msg += ": planner_multi_rem_span returned -1.\n"; - m_err_msg += (*m_graph)[u].name + ".\n"; - m_err_msg += strerror (errno); - m_err_msg += "\n"; + if (mod_data.mod_type != job_modify_t::PARTIAL_CANCEL) { + if ( (rc = planner_multi_rem_span (subtree_plan, + span_it->second)) != 0) { + m_err_msg += __FUNCTION__; + m_err_msg += ": planner_multi_rem_span returned -1.\n"; + m_err_msg += (*m_graph)[u].name + ".\n"; + m_err_msg += strerror (errno); + m_err_msg += "\n"; + goto done; + } + job2span.erase (span_it); + } else { // PARTIAL_CANCEL + if ( (*m_graph)[u].idata.tags.find (jobid) + == (*m_graph)[u].idata.tags.end ()) { + // stop removal + stop = true; + goto done; + } + // If not a default/root rank and the rank is still in the graph + // but don't remove exclusive filter as allocation may be exclusive + // at the subgraph rooted here. + if ( (mod_data.ranks_removed.find ((*m_graph)[u].rank) + == mod_data.ranks_removed.end ()) + && (*m_graph)[u].rank != -1) { + stop = true; + goto done; + } + if (mod_data.type_to_count.size () > 0) { + std::vector reduced_types; + std::vector reduced_counts; + for (const auto &t2ct_it : mod_data.type_to_count) { + reduced_types.push_back (t2ct_it.first); + reduced_counts.push_back (t2ct_it.second); + } + if ( (rc = planner_multi_reduce_span (subtree_plan, + span_it->second, + reduced_counts.data (), + reduced_types.data (), + mod_data.type_to_count.size (), + removed)) != 0) { + m_err_msg += __FUNCTION__; + m_err_msg += ": planner_multi_reduce_span returned -1.\n"; + m_err_msg += (*m_graph)[u].name + ".\n"; + m_err_msg += strerror (errno); + m_err_msg += "\n"; + goto done; + } + } else { + m_err_msg += __FUNCTION__; + m_err_msg += ": type_to_count empty.\n"; + m_err_msg += (*m_graph)[u].name + ".\n"; + m_err_msg += strerror (errno); + m_err_msg += "\n"; + rc = -1; + goto done; + } + if (removed) { + // Fully removed; need to remove job2span and tag + job2span.erase (span_it); + rem_tag (u, jobid); + } + rc = rem_exclusive_filter (u, jobid, mod_data); } - job2span.erase (jobid); done: return rc; } -int dfu_impl_t::rem_idata (vtx_t u, int64_t jobid, - const std::string &subsystem, bool &stop) +int dfu_impl_t::mod_idata (vtx_t u, int64_t jobid, + const std::string &subsystem, + const modify_data_t &mod_data, + bool &stop) { - int rc = -1; - - if ( (rc = rem_txfilter (u, jobid, stop)) != 0 || stop) - goto done; - if ( (rc = rem_agfilter (u, jobid, subsystem)) != 0) - goto done; - -done: - return rc; + // Only remove the txfilter span and tag first if we're completely + // cancelling the vertex + if (mod_data.mod_type != job_modify_t::PARTIAL_CANCEL) { + // returns true if stopping + if ( (stop = rem_tag (u, jobid))) + return 0; + if (rem_exclusive_filter (u, jobid, mod_data) != 0) + return -1; + } + // If mod_type == job_modify_t::PARTIAL_CANCEL here, + // job_mod_agfilter determines if all resources are removed. If so, + // job_mod_agfilter will then call rem_tag. + return mod_agfilter (u, jobid, subsystem, mod_data, stop); } -int dfu_impl_t::rem_plan (vtx_t u, int64_t jobid) +int dfu_impl_t::mod_plan (vtx_t u, int64_t jobid, + modify_data_t &mod_data) { int rc = 0; int64_t span = -1; + int64_t prev_count = -1; + int64_t to_remove = 0; + bool removed = false; + std::map::iterator alloc_span; + std::map::iterator res_span; planner_t *plans = NULL; - if ((*m_graph)[u].schedule.allocations.find (jobid) - != (*m_graph)[u].schedule.allocations.end ()) { - span = (*m_graph)[u].schedule.allocations[jobid]; - (*m_graph)[u].schedule.allocations.erase (jobid); - } else if ((*m_graph)[u].schedule.reservations.find (jobid) + alloc_span = (*m_graph)[u].schedule.allocations.find (jobid); + if (alloc_span != (*m_graph)[u].schedule.allocations.end ()) { + span = alloc_span->second; + if (mod_data.mod_type != job_modify_t::PARTIAL_CANCEL) { + (*m_graph)[u].schedule.allocations.erase (alloc_span); + } + } else if ( (res_span = (*m_graph)[u].schedule.reservations.find (jobid)) != (*m_graph)[u].schedule.reservations.end ()) { - span = (*m_graph)[u].schedule.reservations[jobid]; - (*m_graph)[u].schedule.reservations.erase (jobid); + span = res_span->second; + // Can't be PARTIAL_CANCEL + (*m_graph)[u].schedule.reservations.erase (res_span); } else { goto done; } plans = (*m_graph)[u].schedule.plans; - if ( (rc = planner_rem_span (plans, span)) == -1) { + if (mod_data.mod_type != job_modify_t::PARTIAL_CANCEL) { + if (mod_data.mod_type == job_modify_t::VTX_CANCEL) + prev_count = planner_span_resource_count (plans, span); + if ( (rc = planner_rem_span (plans, span)) == -1) { + m_err_msg += __FUNCTION__; + m_err_msg += ": planner_rem_span returned -1.\n"; + m_err_msg += (*m_graph)[u].name + ".\n"; + m_err_msg += strerror (errno); + m_err_msg += ".\n"; + goto done; + } + // Accumulate counts per type to partially remove from filters + if (mod_data.mod_type == job_modify_t::VTX_CANCEL) { + mod_data.type_to_count[(*m_graph)[u].type.c_str ()] += prev_count; + } + } else { // PARTIAL_CANCEL m_err_msg += __FUNCTION__; - m_err_msg += ": planner_rem_span returned -1.\n"; + m_err_msg += ": traverser tried to remove schedule and span"; + m_err_msg += " after vtx_cancel during partial cancel:\n"; m_err_msg += (*m_graph)[u].name + ".\n"; m_err_msg += strerror (errno); m_err_msg += ".\n"; + rc = -1; } done: return rc; } -int dfu_impl_t::rem_upv (vtx_t u, int64_t jobid) +int dfu_impl_t::mod_upv (vtx_t u, int64_t jobid, + const modify_data_t &mod_data) { // NYI: remove schedule data for upwalk return 0; } -int dfu_impl_t::rem_dfv (vtx_t u, int64_t jobid) +int dfu_impl_t::mod_dfv (vtx_t u, int64_t jobid, + modify_data_t &mod_data) { int rc = 0; bool stop = false; const std::string &dom = m_match->dom_subsystem (); f_out_edg_iterator_t ei, ei_end; - if ( (rc = rem_idata (u, jobid, dom, stop)) != 0 || stop) + if ( (rc = mod_idata (u, jobid, dom, mod_data, + stop)) != 0 || stop) goto done; - if ( (rc = rem_plan (u, jobid)) != 0) + if ( (rc = mod_plan (u, jobid, mod_data)) != 0) goto done; - for (auto &subsystem : m_match->subsystems ()) { + for (auto const &subsystem : m_match->subsystems ()) { for (tie (ei, ei_end) = out_edges (u, *m_graph); ei != ei_end; ++ei) { if (!in_subsystem (*ei, subsystem) || stop_explore (*ei, subsystem)) continue; vtx_t tgt = target (*ei, *m_graph); if (subsystem == dom) - rc += rem_dfv (tgt, jobid); + rc += mod_dfv (tgt, jobid, mod_data); else - rc += rem_upv (tgt, jobid); + rc += mod_upv (tgt, jobid, mod_data); } } done: return rc; } -int dfu_impl_t::rem_exv (int64_t jobid) +int dfu_impl_t::mod_exv (int64_t jobid, const modify_data_t &mod_data) { int rc = -1; int64_t span = -1; @@ -531,7 +640,23 @@ int dfu_impl_t::rem_exv (int64_t jobid) return (!rc)? 0 : -1; } +int dfu_impl_t::cancel_vertex (vtx_t vtx, modify_data_t &mod_data, + int64_t jobid) +{ + int rc = -1; + bool stop = false; + const std::string &dom = m_match->dom_subsystem (); + + if ( (rc = mod_idata (vtx, jobid, dom, mod_data, stop)) == -1) { + errno = EINVAL; + return rc; + } + if ( (rc = mod_plan (vtx, jobid, mod_data)) == -1) + errno = EINVAL; + return rc; + +} //////////////////////////////////////////////////////////////////////////////// // DFU Traverser Implementation Update API @@ -644,8 +769,69 @@ int dfu_impl_t::remove (vtx_t root, int64_t jobid) { bool root_has_jtag = ((*m_graph)[root].idata.tags.find (jobid) != (*m_graph)[root].idata.tags.end ()); + modify_data_t mod_data; + mod_data.mod_type = job_modify_t::CANCEL; m_color.reset (); - return (root_has_jtag)? rem_dfv (root, jobid) : rem_exv (jobid); + return (root_has_jtag)? mod_dfv (root, jobid, mod_data) + : mod_exv (jobid, mod_data); +} + +int dfu_impl_t::remove (vtx_t root, const std::string &R_to_cancel, + std::shared_ptr &reader, + int64_t jobid, bool &full_cancel) +{ + int rc = -1; + modify_data_t mod_data; + resource_graph_t &g = m_graph_db->resource_graph; + resource_graph_metadata_t &m = m_graph_db->metadata; + + if (reader->partial_cancel (g, m, mod_data, R_to_cancel, + jobid) != 0) { + m_err_msg += __FUNCTION__; + m_err_msg += ": partial_cancel returned error.\n"; + return -1; + } + + // If type_to_count size is 0, reader was not JGF + if (mod_data.type_to_count.size () == 0) { + // Set modify type to be vertex cancel + mod_data.mod_type = job_modify_t::VTX_CANCEL; + for (const int64_t &rank : mod_data.ranks_removed) { + auto rank_vector = m.by_rank.find (rank); + if (rank_vector == m.by_rank.end ()) { + m_err_msg += __FUNCTION__; + m_err_msg += ": rank not found in by_rank map.\n"; + return -1; + } + for (const vtx_t &vtx : rank_vector->second) { + // Cancel the vertex if it has job tag. Not necessary + // but reduces number of checks before function return + if ( (*m_graph)[vtx].idata.tags.find (jobid) + != (*m_graph)[vtx].idata.tags.end ()) { + if ( (rc = cancel_vertex (vtx, mod_data, jobid)) != 0) { + errno = EINVAL; + return rc; + } + } + } + } + } + + bool root_has_jtag = ((*m_graph)[root].idata.tags.find (jobid) + != (*m_graph)[root].idata.tags.end ()); + // Now partial cancel DFV from graph root + mod_data.mod_type = job_modify_t::PARTIAL_CANCEL; + m_color.reset (); + if (root_has_jtag) { + rc = mod_dfv (root, jobid, mod_data); + // Was the root vertex's job tag removed? If so, full_cancel + full_cancel = ((*m_graph)[root].idata.tags.find (jobid) + == (*m_graph)[root].idata.tags.end ()); + } else { + rc = mod_exv (jobid, mod_data); + } + + return rc; } int dfu_impl_t::mark (const std::string &root_path, From 680669ef7225aac90011aec7b28a3b1295cb5684 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Tue, 21 May 2024 18:41:32 -0700 Subject: [PATCH 11/24] reader: explicit return checking for subgraph removal Problem: the JGF reader subgraph removal function does not do explicit return checking for the helper functions. Add the checking to reduce the likelihood of error propagation. --- resource/readers/resource_reader_jgf.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/resource/readers/resource_reader_jgf.cpp b/resource/readers/resource_reader_jgf.cpp index a0bf8deb3..26b6754b5 100644 --- a/resource/readers/resource_reader_jgf.cpp +++ b/resource/readers/resource_reader_jgf.cpp @@ -1280,14 +1280,13 @@ int resource_reader_jgf_t::remove_subgraph (resource_graph_t &g, get_subgraph_vertices (g, subgraph_root_vtx, vtx_list); - if ( get_parent_vtx (g, subgraph_root_vtx, parent_vtx) ) + if (get_parent_vtx (g, subgraph_root_vtx, parent_vtx) != 0) return -1; - if ( remove_metadata_outedges (parent_vtx, subgraph_root_vtx, g, m) ) + if (remove_metadata_outedges (parent_vtx, subgraph_root_vtx, g, m) != 0) return -1; - for (auto & vtx : vtx_list) - { + for (auto & vtx : vtx_list) { // clear vertex edges but don't delete vertex boost::clear_vertex (vtx, g); remove_graph_metadata (vtx, g, m); From 0af606d6a5277e4250892eef439db11fadd46dfa Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sat, 25 May 2024 23:16:58 -0700 Subject: [PATCH 12/24] readers: add experimental partial cancel support for JGF Problem: JGF partial cancellation is more complex and flexible than RV1exec. Unlike RV1exec, there is no top-level metadata that tracks the ranks to be canceled. That means the full JGF string needs to be unpacked, and each vertex fetched and cancelled individually. Add the capability to cancel each vertex in the JGF subgraph and track the number and types of resources to be removed to pass back to the traverser for its top-down removal. This functionality is currently experimental and will support cloud deployments. The implementation can be extended in a straightforward way to arbitrary resource release. --- resource/readers/resource_reader_jgf.cpp | 163 +++++++++++++++++++---- resource/readers/resource_reader_jgf.hpp | 34 +++-- 2 files changed, 160 insertions(+), 37 deletions(-) diff --git a/resource/readers/resource_reader_jgf.cpp b/resource/readers/resource_reader_jgf.cpp index 26b6754b5..c66fef151 100644 --- a/resource/readers/resource_reader_jgf.cpp +++ b/resource/readers/resource_reader_jgf.cpp @@ -725,8 +725,7 @@ int resource_reader_jgf_t::find_vtx (resource_graph_t &g, int resource_reader_jgf_t::update_vtx_plan (vtx_t v, resource_graph_t &g, const fetch_helper_t &fetcher, - uint64_t jobid, int64_t at, - uint64_t dur, bool rsv) + jgf_updater_data &update_data) { int rc = -1; int64_t span = -1; @@ -739,7 +738,9 @@ int resource_reader_jgf_t::update_vtx_plan (vtx_t v, resource_graph_t &g, m_err_msg += ": plan for " + g[v].name + " is null.\n"; goto done; } - if ( (avail = planner_avail_resources_during (plans, at, dur)) == -1) { + if ( (avail = planner_avail_resources_during (plans, + update_data.at, + update_data.duration)) == -1) { m_err_msg += __FUNCTION__; m_err_msg += ": planner_avail_resource_during return -1 for "; m_err_msg += g[v].name + ".\n"; @@ -749,16 +750,17 @@ int resource_reader_jgf_t::update_vtx_plan (vtx_t v, resource_graph_t &g, if (fetcher.exclusive) { // Update the vertex plan here (not in traverser code) so vertices // that the traverser won't walk still get their plans updated. - if ( (span = planner_add_span (plans, at, dur, - static_cast (g[v].size))) == -1) { + if ( (span = planner_add_span (plans, update_data.at, + update_data.duration, + static_cast (g[v].size))) == -1) { m_err_msg += __FUNCTION__; m_err_msg += ": can't add span into " + g[v].name + ".\n"; goto done; } - if (rsv) - g[v].schedule.reservations[jobid] = span; + if (update_data.reserved) + g[v].schedule.reservations[update_data.jobid] = span; else - g[v].schedule.allocations[jobid] = span; + g[v].schedule.allocations[update_data.jobid] = span; } else { if (avail < g[v].size) { // if g[v] has already been allocated/reserved, this is an error @@ -773,12 +775,81 @@ int resource_reader_jgf_t::update_vtx_plan (vtx_t v, resource_graph_t &g, return rc; } +int resource_reader_jgf_t::cancel_vtx (vtx_t vtx, resource_graph_t &g, + resource_graph_metadata_t &m, + const fetch_helper_t &fetcher, + jgf_updater_data &update_data) +{ + int rc = -1; + int64_t span = -1; + int64_t xspan = -1; + int64_t sched_span = -1; + int64_t prev_avail = -1; + planner_multi_t *subtree_plan = NULL; + planner_t *x_checker = NULL; + planner_t *plans = NULL; + auto &job2span = g[vtx].idata.job2span; + auto &x_spans = g[vtx].idata.x_spans; + auto &tags = g[vtx].idata.tags; + std::map::iterator span_it; + std::map::iterator xspan_it; + + // remove from aggregate filter if present + auto agg_span = job2span.find (update_data.jobid); + if (agg_span != job2span.end ()) { + if ((subtree_plan = g[vtx].idata.subplans["containment"]) == NULL) + goto ret; + if (planner_multi_rem_span (subtree_plan, agg_span->second) != 0) + goto ret; + // Delete from job2span tracker + job2span.erase (update_data.jobid); + } + + // remove from exclusive filter; + xspan_it = x_spans.find (update_data.jobid); + if (xspan_it == x_spans.end ()) { + errno = EINVAL; + goto ret; + } + xspan = xspan_it->second; + x_checker = g[vtx].idata.x_checker; + g[vtx].idata.tags.erase (update_data.jobid); + g[vtx].idata.x_spans.erase (update_data.jobid); + if (planner_rem_span (x_checker, xspan) == -1) { + errno = EINVAL; + goto ret; + } + // rem plan + span_it = g[vtx].schedule.allocations.find (update_data.jobid); + sched_span = span_it->second; + if (span_it != g[vtx].schedule.allocations.end ()) { + g[vtx].schedule.allocations.erase (update_data.jobid); + } else { + errno = EINVAL; + goto ret; + } + plans = g[vtx].schedule.plans; + prev_avail = planner_avail_resources_at (plans, 0); + if (planner_rem_span (plans, sched_span) == -1) { + errno = EINVAL; + goto ret; + } + // Add the newly freed counts, Can't assume it freed everything. + update_data.type_to_count[g[vtx].type.c_str ()] + += (planner_avail_resources_at (plans, 0) - prev_avail); + update_data.ranks.insert (g[vtx].rank); + + rc = 0; + +ret: + return rc; +} + int resource_reader_jgf_t::update_vtx (resource_graph_t &g, resource_graph_metadata_t &m, std::map &vmap, const fetch_helper_t &fetcher, - uint64_t jobid, int64_t at, - uint64_t dur, bool rsv) + jgf_updater_data &update_data) { int rc = -1; std::map root_checks; @@ -791,8 +862,13 @@ int resource_reader_jgf_t::update_vtx (resource_graph_t &g, goto done; if ( (rc = update_vmap (vmap, v, root_checks, fetcher)) != 0) goto done; - if ( (rc = update_vtx_plan (v, g, fetcher, jobid, at, dur, rsv)) != 0) - goto done; + if (update_data.update) { + if ( (rc = update_vtx_plan (v, g, fetcher, update_data)) != 0) + goto done; + } else { + if ( (rc = cancel_vtx (v, g, m, fetcher, update_data)) != 0) + goto done; + } done: return rc; @@ -800,7 +876,7 @@ int resource_reader_jgf_t::update_vtx (resource_graph_t &g, int resource_reader_jgf_t::undo_vertices (resource_graph_t &g, std::map &vmap, - uint64_t jobid, bool rsv) + jgf_updater_data &update_data) { int rc = 0; int rc2 = 0; @@ -813,12 +889,12 @@ int resource_reader_jgf_t::undo_vertices (resource_graph_t &g, continue; try { v = kv.second.v; - if (rsv) { - span = g[v].schedule.reservations.at (jobid); - g[v].schedule.reservations.erase (jobid); + if (update_data.reserved) { + span = g[v].schedule.reservations.at (update_data.jobid); + g[v].schedule.reservations.erase (update_data.jobid); } else { - span = g[v].schedule.allocations.at (jobid); - g[v].schedule.allocations.erase (jobid); + span = g[v].schedule.allocations.at (update_data.jobid); + g[v].schedule.allocations.erase (update_data.jobid); } plans = g[v].schedule.plans; @@ -886,9 +962,8 @@ int resource_reader_jgf_t::update_vertices (resource_graph_t &g, resource_graph_metadata_t &m, std::map &vmap, - json_t *nodes, int64_t jobid, - int64_t at, uint64_t dur, - bool rsv) + json_t *nodes, + jgf_updater_data &update_data) { int rc = -1; unsigned int i = 0; @@ -898,7 +973,7 @@ int resource_reader_jgf_t::update_vertices (resource_graph_t &g, fetcher.scrub (); if ( (rc = unpack_vtx (json_array_get (nodes, i), fetcher)) != 0) goto done; - if ( (rc = update_vtx (g, m, vmap, fetcher, jobid, at, dur, rsv)) != 0) + if ( (rc = update_vtx (g, m, vmap, fetcher, update_data)) != 0) goto done; } rc = 0; @@ -1236,6 +1311,7 @@ int resource_reader_jgf_t::update (resource_graph_t &g, json_t *nodes = NULL; json_t *edges = NULL; std::map vmap; + jgf_updater_data update_data; if (at < 0 || dur == 0) { errno = EINVAL; @@ -1245,10 +1321,18 @@ int resource_reader_jgf_t::update (resource_graph_t &g, + std::to_string (dur) + ").\n"; goto done; } + + // Fill in updater data + update_data.jobid = jobid; + update_data.at = at; + update_data.duration = dur; + update_data.reserved = rsv; + update_data.update = true; + if ( (rc = fetch_jgf (str, &jgf, &nodes, &edges)) != 0) goto done; - if ( (rc = update_vertices (g, m, vmap, nodes, jobid, at, dur, rsv)) != 0) { - undo_vertices (g, vmap, jobid, rsv); + if ( (rc = update_vertices (g, m, vmap, nodes, update_data)) != 0) { + undo_vertices (g, vmap, update_data); goto done; } if ( (rc = update_edges (g, m, vmap, edges, token)) != 0) @@ -1301,8 +1385,35 @@ int resource_reader_jgf_t::partial_cancel (resource_graph_t &g, modify_data_t &mod_data, const std::string &R, int64_t jobid) { - errno = ENOTSUP; // JGF reader does not support partial cancel - return -1; + int rc = -1; + json_t *jgf = NULL; + json_t *nodes = NULL; + json_t *edges = NULL; + std::map vmap; + jgf_updater_data p_cancel_data; + + if (jobid <= 0) { + errno = EINVAL; + m_err_msg += __FUNCTION__; + m_err_msg += ": invalid jobid\n"; + goto done; + } + + // Fill in updater data + p_cancel_data.jobid = jobid; + p_cancel_data.update = false; + + if ( (rc = fetch_jgf (R, &jgf, &nodes, &edges)) != 0) + goto done; + if ( (rc = update_vertices (g, m, vmap, nodes, p_cancel_data)) != 0) + goto done; + + mod_data.type_to_count = p_cancel_data.type_to_count; + mod_data.ranks_removed = p_cancel_data.ranks; + +done: + json_decref (jgf); + return rc; } bool resource_reader_jgf_t::is_allowlist_supported () diff --git a/resource/readers/resource_reader_jgf.hpp b/resource/readers/resource_reader_jgf.hpp index 6836fa429..7de01f29f 100644 --- a/resource/readers/resource_reader_jgf.hpp +++ b/resource/readers/resource_reader_jgf.hpp @@ -23,6 +23,19 @@ struct vmap_val_t; namespace Flux { namespace resource_model { +// Struct to track data for updates +struct jgf_updater_data { + int64_t jobid = 0; + int64_t at = 0; + uint64_t duration = 0; + bool reserved = false; + // track counts of resources to be cancelled + std::unordered_map type_to_count; + // track count of rank vertices to determine if rank + // should be removed from by_rank map + std::unordered_set ranks; + bool update = true; // Updating or partial cancel +}; /*! JGF resource reader class. */ @@ -139,27 +152,26 @@ class resource_reader_jgf_t : public resource_reader_base_t { std::map &vmap, const fetch_helper_t &fetcher, vtx_t &ret_v); int update_vtx_plan (vtx_t v, resource_graph_t &g, - const fetch_helper_t &fetcher, uint64_t jobid, - int64_t at, uint64_t dur, bool rsv); + const fetch_helper_t &fetcher, + jgf_updater_data &update_data); + int cancel_vtx (vtx_t v, resource_graph_t &g, + resource_graph_metadata_t &m, + const fetch_helper_t &fetcher, + jgf_updater_data &update_data); int update_vtx (resource_graph_t &g, resource_graph_metadata_t &m, std::map &vmap, - const fetch_helper_t &fetcher, uint64_t jobid, int64_t at, - uint64_t dur, bool rsv); + const fetch_helper_t &fetcher, + jgf_updater_data &updater_data); int unpack_vertices (resource_graph_t &g, resource_graph_metadata_t &m, std::map &vmap, json_t *nodes, std::unordered_set &added_vtcs); int undo_vertices (resource_graph_t &g, std::map &vmap, - uint64_t jobid, bool rsv); - int update_vertices (resource_graph_t &g, resource_graph_metadata_t &m, - std::map &vmap, - json_t *nodes, int64_t jobid, int64_t at, - uint64_t dur, bool rsv); + jgf_updater_data &updater_data); int update_vertices (resource_graph_t &g, resource_graph_metadata_t &m, std::map &vmap, - json_t *nodes, int64_t jobid, int64_t at, - uint64_t dur); + json_t *nodes, jgf_updater_data &updater_data); int unpack_edge (json_t *element, std::map &vmap, std::string &source, std::string &target, json_t **name); int update_src_edge (resource_graph_t &g, resource_graph_metadata_t &m, From a1c2d5e8b6512da7669fd783fe123b704c84a3b6 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 21 Jun 2024 01:57:13 -0700 Subject: [PATCH 13/24] resource-query: add support for partial-cancel Problem: resource-query doesn't have partial cancel functionality. Add the necessary functions, help information, and input parsing to enable sharness tests. --- resource/utilities/command.cpp | 88 ++++++++++++++++++++++++++++++++++ resource/utilities/command.hpp | 2 + 2 files changed, 90 insertions(+) diff --git a/resource/utilities/command.cpp b/resource/utilities/command.cpp index 2d60a4dfa..803cccf5b 100644 --- a/resource/utilities/command.cpp +++ b/resource/utilities/command.cpp @@ -50,6 +50,8 @@ command_t commands[] = { "resource-query> find status=down and sched-now=allocated" }, { "cancel", "c", cmd_cancel, "Cancel an allocation or reservation: " "resource-query> cancel jobid" }, + { "partial-cancel", "pc", cmd_partial_cancel, "Partially release an allocation: " +"resource-query> partial-cancel jobid (file format: jgf | rv1exec) R_to_cancel.file" }, { "set-property", "p", cmd_set_property, "Add a property to a resource: " "resource-query> set-property resource PROPERTY=VALUE" }, { "get-property", "g", cmd_get_property, "Get all properties of a resource: " @@ -84,6 +86,26 @@ static int do_remove (std::shared_ptr &ctx, int64_t jobid) return rc; } +static int do_partial_remove (std::shared_ptr &ctx, + std::shared_ptr &reader, + int64_t jobid, const std::string &R_cancel, + bool &full_cancel) +{ + int rc = -1; + + if ( (rc = ctx->traverser->remove (R_cancel, reader, (int64_t)jobid, + full_cancel)) == 0) { + if (full_cancel && (ctx->jobs.find (jobid) != ctx->jobs.end ())) { + std::shared_ptr info = ctx->jobs[jobid]; + info->state = job_lifecycle_t::CANCELED; + } + } else { + std::cout << ctx->traverser->err_message (); + ctx->traverser->clear_err_message (); + } + return rc; +} + static void print_sat_info (std::shared_ptr &ctx, std::ostream &out, bool sat, double elapse, unsigned int pre, unsigned int post) @@ -600,6 +622,72 @@ int cmd_cancel (std::shared_ptr &ctx, return 0; } +int cmd_partial_cancel (std::shared_ptr &ctx, + std::vector &args) +{ + int rc = -1; + std::stringstream buffer{}; + std::shared_ptr rd; + + if (args.size () != 4) { + std::cerr << "ERROR: malformed command" << std::endl; + return 0; + } + + std::string jobid_str = args[1]; + std::string reader = args[2]; + std::ifstream cancel_file (args[3]); + uint64_t jobid = (uint64_t)std::strtoll (jobid_str.c_str (), NULL, 10); + bool full_cancel = false; + + if (!(reader == "jgf" || reader == "rv1exec")) { + std::cerr << "ERROR: unsupported reader " << args[2] << std::endl; + goto done; + } + + if (!cancel_file) { + std::cerr << "ERROR: can't open " << args[3] << std::endl; + goto done; + } + buffer << cancel_file.rdbuf (); + cancel_file.close (); + + if (reader == "rv1exec") { + if ( (rd = create_resource_reader ("rv1exec")) == nullptr) { + std::cerr << "ERROR: can't create rv1exec reader " << std::endl; + goto done; + } + } else { // must be JGF + if ( (rd = create_resource_reader ("jgf")) == nullptr) { + std::cerr << "ERROR: can't create rv1exec reader " << std::endl; + goto done; + } + } + + if (ctx->allocations.find (jobid) != ctx->allocations.end ()) { + if ( (rc = do_partial_remove (ctx, rd, jobid, buffer.str (), + full_cancel)) == 0) { + if (full_cancel) + ctx->allocations.erase (jobid); + } + } else if (ctx->reservations.find (jobid) != ctx->reservations.end ()) { + std::cerr << "ERROR: reservations not currently supported by partial cancel" + << std::endl; + goto done; + } else { + std::cerr << "ERROR: nonexistent job " << jobid << std::endl; + goto done; + } + + if (rc != 0) { + std::cerr << "ERROR: error encountered while removing job " + << jobid << std::endl; + } + +done: + return 0; +} + int cmd_set_property (std::shared_ptr &ctx, std::vector &args) { diff --git a/resource/utilities/command.hpp b/resource/utilities/command.hpp index 4081fc2c4..e083d6fe0 100644 --- a/resource/utilities/command.hpp +++ b/resource/utilities/command.hpp @@ -81,6 +81,8 @@ int cmd_find (std::shared_ptr &ctx, std::vector &args); int cmd_cancel (std::shared_ptr &ctx, std::vector &args); +int cmd_partial_cancel (std::shared_ptr &ctx, + std::vector &args); int cmd_set_property (std::shared_ptr &ctx, std::vector &args); int cmd_get_property (std::shared_ptr &ctx, From 4efe5e291ce7c3cce4a44b34559deeeee2629cfe Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 24 Jun 2024 11:25:41 -0700 Subject: [PATCH 14/24] testsuite: add partial cancel tests for rv1exec and JGF Problem: there are no sharness CI tests for partial cancel/release. Add tests for correct behavior. --- t/data/resource/commands/cancel/cmds03.in | 3 + t/data/resource/commands/cancel/cmds04.in | 8 + t/data/resource/commands/cancel/cmds05.in | 4 + t/data/resource/commands/cancel/cmds06.in | 9 + t/data/resource/commands/cancel/cmds07.in | 13 + t/data/resource/commands/cancel/cmds08.in | 8 + t/data/resource/commands/cancel/cmds09.in | 9 + t/data/resource/expected/cancel/015.R.out | 49 + t/data/resource/expected/cancel/016.R.out | 192 + t/data/resource/expected/cancel/017.R.out | 97 + t/data/resource/expected/cancel/018.R.out | 198 + t/data/resource/expected/cancel/019.R.out | 237 ++ t/data/resource/expected/cancel/020.R.out | 421 ++ t/data/resource/expected/cancel/021.R.out | 428 ++ .../jgfs/elastic/node-1-partial-cancel.json | 1705 ++++++++ .../jgfs/elastic/tiny-partial-cancel.json | 3490 +++++++++++++++++ t/data/resource/jobspecs/cancel/test018.yaml | 23 + t/data/resource/jobspecs/cancel/test019.yaml | 23 + t/data/resource/jobspecs/cancel/test020.yaml | 24 + t/data/resource/jobspecs/cancel/test021.yaml | 24 + t/data/resource/jobspecs/cancel/test022.yaml | 28 + t/data/resource/jobspecs/cancel/test023.yaml | 27 + .../rv1exec/cancel/rank0_cancel_nonexcl.json | 1 + .../resource/rv1exec/cancel/rank1_cancel.json | 1 + .../rv1exec/cancel/rank1_cancel_nonexcl.json | 1 + t/t3008-resource-cancel.t | 79 +- 26 files changed, 7101 insertions(+), 1 deletion(-) create mode 100644 t/data/resource/commands/cancel/cmds03.in create mode 100644 t/data/resource/commands/cancel/cmds04.in create mode 100644 t/data/resource/commands/cancel/cmds05.in create mode 100644 t/data/resource/commands/cancel/cmds06.in create mode 100644 t/data/resource/commands/cancel/cmds07.in create mode 100644 t/data/resource/commands/cancel/cmds08.in create mode 100644 t/data/resource/commands/cancel/cmds09.in create mode 100644 t/data/resource/expected/cancel/015.R.out create mode 100644 t/data/resource/expected/cancel/016.R.out create mode 100644 t/data/resource/expected/cancel/017.R.out create mode 100644 t/data/resource/expected/cancel/018.R.out create mode 100644 t/data/resource/expected/cancel/019.R.out create mode 100644 t/data/resource/expected/cancel/020.R.out create mode 100644 t/data/resource/expected/cancel/021.R.out create mode 100644 t/data/resource/jgfs/elastic/node-1-partial-cancel.json create mode 100644 t/data/resource/jgfs/elastic/tiny-partial-cancel.json create mode 100644 t/data/resource/jobspecs/cancel/test018.yaml create mode 100644 t/data/resource/jobspecs/cancel/test019.yaml create mode 100644 t/data/resource/jobspecs/cancel/test020.yaml create mode 100644 t/data/resource/jobspecs/cancel/test021.yaml create mode 100644 t/data/resource/jobspecs/cancel/test022.yaml create mode 100644 t/data/resource/jobspecs/cancel/test023.yaml create mode 100644 t/data/resource/rv1exec/cancel/rank0_cancel_nonexcl.json create mode 100644 t/data/resource/rv1exec/cancel/rank1_cancel.json create mode 100644 t/data/resource/rv1exec/cancel/rank1_cancel_nonexcl.json diff --git a/t/data/resource/commands/cancel/cmds03.in b/t/data/resource/commands/cancel/cmds03.in new file mode 100644 index 000000000..125855dc1 --- /dev/null +++ b/t/data/resource/commands/cancel/cmds03.in @@ -0,0 +1,3 @@ +match allocate_orelse_reserve @TEST_SRCDIR@/data/resource/jobspecs/cancel/test018.yaml +partial-cancel 1 foo @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank1_cancel.json +quit diff --git a/t/data/resource/commands/cancel/cmds04.in b/t/data/resource/commands/cancel/cmds04.in new file mode 100644 index 000000000..2eb1028d5 --- /dev/null +++ b/t/data/resource/commands/cancel/cmds04.in @@ -0,0 +1,8 @@ +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test018.yaml +find sched-now=allocated +partial-cancel 1 rv1exec @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank1_cancel.json +find sched-now=allocated +info 1 +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test019.yaml +find sched-now=allocated +quit diff --git a/t/data/resource/commands/cancel/cmds05.in b/t/data/resource/commands/cancel/cmds05.in new file mode 100644 index 000000000..62b647b2a --- /dev/null +++ b/t/data/resource/commands/cancel/cmds05.in @@ -0,0 +1,4 @@ +match allocate_orelse_reserve @TEST_SRCDIR@/data/resource/jobspecs/cancel/test018.yaml +match allocate_orelse_reserve @TEST_SRCDIR@/data/resource/jobspecs/cancel/test018.yaml +partial-cancel 2 rv1exec @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank1_cancel.json +quit diff --git a/t/data/resource/commands/cancel/cmds06.in b/t/data/resource/commands/cancel/cmds06.in new file mode 100644 index 000000000..bf6981fe5 --- /dev/null +++ b/t/data/resource/commands/cancel/cmds06.in @@ -0,0 +1,9 @@ +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test019.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test019.yaml +find sched-now=allocated +partial-cancel 2 rv1exec @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank1_cancel.json +find sched-now=allocated +info 2 +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test019.yaml +find sched-now=allocated +quit diff --git a/t/data/resource/commands/cancel/cmds07.in b/t/data/resource/commands/cancel/cmds07.in new file mode 100644 index 000000000..826170f99 --- /dev/null +++ b/t/data/resource/commands/cancel/cmds07.in @@ -0,0 +1,13 @@ +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test020.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test021.yaml +find sched-now=allocated +partial-cancel 1 rv1exec @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank1_cancel_nonexcl.json +find sched-now=allocated +info 1 +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test019.yaml +find sched-now=allocated +partial-cancel 1 rv1exec @TEST_SRCDIR@/data/resource/rv1exec/cancel/rank0_cancel_nonexcl.json +find sched-now=allocated +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test021.yaml +find sched-now=free +quit diff --git a/t/data/resource/commands/cancel/cmds08.in b/t/data/resource/commands/cancel/cmds08.in new file mode 100644 index 000000000..ae409d8e7 --- /dev/null +++ b/t/data/resource/commands/cancel/cmds08.in @@ -0,0 +1,8 @@ +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test022.yaml +find sched-now=allocated +partial-cancel 1 jgf @TEST_SRCDIR@/data/resource/jgfs/elastic/node-1-partial-cancel.json +find sched-now=allocated +info 1 +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test023.yaml +find sched-now=allocated +quit diff --git a/t/data/resource/commands/cancel/cmds09.in b/t/data/resource/commands/cancel/cmds09.in new file mode 100644 index 000000000..5356dd68d --- /dev/null +++ b/t/data/resource/commands/cancel/cmds09.in @@ -0,0 +1,9 @@ +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test023.yaml +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test023.yaml +find sched-now=allocated +partial-cancel 1 jgf @TEST_SRCDIR@/data/resource/jgfs/elastic/node-1-partial-cancel.json +find sched-now=allocated +info 1 +match allocate @TEST_SRCDIR@/data/resource/jobspecs/cancel/test023.yaml +find sched-now=allocated +quit diff --git a/t/data/resource/expected/cancel/015.R.out b/t/data/resource/expected/cancel/015.R.out new file mode 100644 index 000000000..664bf95dc --- /dev/null +++ b/t/data/resource/expected/cancel/015.R.out @@ -0,0 +1,49 @@ + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= +ERROR: unsupported reader foo diff --git a/t/data/resource/expected/cancel/016.R.out b/t/data/resource/expected/cancel/016.R.out new file mode 100644 index 000000000..e41affc63 --- /dev/null +++ b/t/data/resource/expected/cancel/016.R.out @@ -0,0 +1,192 @@ + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= diff --git a/t/data/resource/expected/cancel/017.R.out b/t/data/resource/expected/cancel/017.R.out new file mode 100644 index 000000000..e3f55593a --- /dev/null +++ b/t/data/resource/expected/cancel/017.R.out @@ -0,0 +1,97 @@ + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=RESERVED +INFO: SCHEDULED AT=3600 +INFO: ============================= +ERROR: reservations not currently supported by partial cancel diff --git a/t/data/resource/expected/cancel/018.R.out b/t/data/resource/expected/cancel/018.R.out new file mode 100644 index 000000000..6970ba3d0 --- /dev/null +++ b/t/data/resource/expected/cancel/018.R.out @@ -0,0 +1,198 @@ + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=3 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= diff --git a/t/data/resource/expected/cancel/019.R.out b/t/data/resource/expected/cancel/019.R.out new file mode 100644 index 000000000..ce68ed30b --- /dev/null +++ b/t/data/resource/expected/cancel/019.R.out @@ -0,0 +1,237 @@ + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ------node0[1:s] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ------node1[1:s] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:s] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=3 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node0[1:x] + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------core8[1:x] + ---------core9[1:x] + ---------core10[1:x] + ---------core11[1:x] + ---------core12[1:x] + ---------core13[1:x] + ---------core14[1:x] + ---------core15[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ---------gpu2[1:x] + ---------gpu3[1:x] + ------node1[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------core0[1:x] + ---------core1[1:x] + ---------core2[1:x] + ---------core3[1:x] + ---------core4[1:x] + ---------core5[1:x] + ---------core6[1:x] + ---------core7[1:x] + ---------gpu0[1:x] + ---------gpu1[1:x] + ------node0[1:s] + ---cluster0[1:s] +INFO: ============================= +INFO: JOBID=4 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ------node0[1:x] + ---cluster0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=free" +INFO: ============================= diff --git a/t/data/resource/expected/cancel/020.R.out b/t/data/resource/expected/cancel/020.R.out new file mode 100644 index 000000000..3fece9efb --- /dev/null +++ b/t/data/resource/expected/cancel/020.R.out @@ -0,0 +1,421 @@ + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= diff --git a/t/data/resource/expected/cancel/021.R.out b/t/data/resource/expected/cancel/021.R.out new file mode 100644 index 000000000..e2c15513d --- /dev/null +++ b/t/data/resource/expected/cancel/021.R.out @@ -0,0 +1,428 @@ + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=1 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=2 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ------rack0[1:s] + ---tiny0[1:s] +INFO: ============================= +INFO: JOBID=3 +INFO: RESOURCES=ALLOCATED +INFO: SCHEDULED AT=Now +INFO: ============================= + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node0[1:x] + ---------------core0[1:x] + ---------------core1[1:x] + ---------------core2[1:x] + ---------------core3[1:x] + ---------------core4[1:x] + ---------------core5[1:x] + ---------------core6[1:x] + ---------------core7[1:x] + ---------------core8[1:x] + ---------------core9[1:x] + ---------------core10[1:x] + ---------------core11[1:x] + ---------------core12[1:x] + ---------------core13[1:x] + ---------------core14[1:x] + ---------------core15[1:x] + ---------------core16[1:x] + ---------------core17[1:x] + ---------------gpu0[1:x] + ---------------memory0[2:x] + ---------------memory1[2:x] + ---------------memory2[2:x] + ---------------memory3[2:x] + ------------socket0[1:x] + ---------------core18[1:x] + ---------------core19[1:x] + ---------------core20[1:x] + ---------------core21[1:x] + ---------------core22[1:x] + ---------------core23[1:x] + ---------------core24[1:x] + ---------------core25[1:x] + ---------------core26[1:x] + ---------------core27[1:x] + ---------------core28[1:x] + ---------------core29[1:x] + ---------------core30[1:x] + ---------------core31[1:x] + ---------------core32[1:x] + ---------------core33[1:x] + ---------------core34[1:x] + ---------------core35[1:x] + ---------------gpu1[1:x] + ---------------memory4[2:x] + ---------------memory5[2:x] + ---------------memory6[2:x] + ---------------memory7[2:x] + ------------socket1[1:x] + ---------node1[1:x] + ------rack0[1:x] + ---tiny0[1:x] +INFO: ============================= +INFO: EXPRESSION="sched-now=allocated" +INFO: ============================= diff --git a/t/data/resource/jgfs/elastic/node-1-partial-cancel.json b/t/data/resource/jgfs/elastic/node-1-partial-cancel.json new file mode 100644 index 000000000..f2a6947b3 --- /dev/null +++ b/t/data/resource/jgfs/elastic/node-1-partial-cancel.json @@ -0,0 +1,1705 @@ +{ + "graph": { + "nodes": [ + { + "id": "2", + "metadata": { + "type": "node", + "basename": "node", + "name": "node0", + "id": 0, + "uniq_id": 2, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0" + } + } + }, + { + "id": "4", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket0", + "id": 0, + "uniq_id": 4, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0" + } + } + }, + { + "id": "5", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket1", + "id": 1, + "uniq_id": 5, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1" + } + } + }, + { + "id": "8", + "metadata": { + "type": "core", + "basename": "core", + "name": "core0", + "id": 0, + "uniq_id": 8, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core0" + } + } + }, + { + "id": "9", + "metadata": { + "type": "core", + "basename": "core", + "name": "core1", + "id": 1, + "uniq_id": 9, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core1" + } + } + }, + { + "id": "10", + "metadata": { + "type": "core", + "basename": "core", + "name": "core2", + "id": 2, + "uniq_id": 10, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core2" + } + } + }, + { + "id": "11", + "metadata": { + "type": "core", + "basename": "core", + "name": "core3", + "id": 3, + "uniq_id": 11, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core3" + } + } + }, + { + "id": "12", + "metadata": { + "type": "core", + "basename": "core", + "name": "core4", + "id": 4, + "uniq_id": 12, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core4" + } + } + }, + { + "id": "13", + "metadata": { + "type": "core", + "basename": "core", + "name": "core5", + "id": 5, + "uniq_id": 13, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core5" + } + } + }, + { + "id": "14", + "metadata": { + "type": "core", + "basename": "core", + "name": "core6", + "id": 6, + "uniq_id": 14, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core6" + } + } + }, + { + "id": "15", + "metadata": { + "type": "core", + "basename": "core", + "name": "core7", + "id": 7, + "uniq_id": 15, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core7" + } + } + }, + { + "id": "16", + "metadata": { + "type": "core", + "basename": "core", + "name": "core8", + "id": 8, + "uniq_id": 16, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core8" + } + } + }, + { + "id": "17", + "metadata": { + "type": "core", + "basename": "core", + "name": "core9", + "id": 9, + "uniq_id": 17, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core9" + } + } + }, + { + "id": "18", + "metadata": { + "type": "core", + "basename": "core", + "name": "core10", + "id": 10, + "uniq_id": 18, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core10" + } + } + }, + { + "id": "19", + "metadata": { + "type": "core", + "basename": "core", + "name": "core11", + "id": 11, + "uniq_id": 19, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core11" + } + } + }, + { + "id": "20", + "metadata": { + "type": "core", + "basename": "core", + "name": "core12", + "id": 12, + "uniq_id": 20, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core12" + } + } + }, + { + "id": "21", + "metadata": { + "type": "core", + "basename": "core", + "name": "core13", + "id": 13, + "uniq_id": 21, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core13" + } + } + }, + { + "id": "22", + "metadata": { + "type": "core", + "basename": "core", + "name": "core14", + "id": 14, + "uniq_id": 22, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core14" + } + } + }, + { + "id": "23", + "metadata": { + "type": "core", + "basename": "core", + "name": "core15", + "id": 15, + "uniq_id": 23, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core15" + } + } + }, + { + "id": "24", + "metadata": { + "type": "core", + "basename": "core", + "name": "core16", + "id": 16, + "uniq_id": 24, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core16" + } + } + }, + { + "id": "25", + "metadata": { + "type": "core", + "basename": "core", + "name": "core17", + "id": 17, + "uniq_id": 25, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core17" + } + } + }, + { + "id": "26", + "metadata": { + "type": "core", + "basename": "core", + "name": "core18", + "id": 18, + "uniq_id": 26, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core18" + } + } + }, + { + "id": "27", + "metadata": { + "type": "core", + "basename": "core", + "name": "core19", + "id": 19, + "uniq_id": 27, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core19" + } + } + }, + { + "id": "28", + "metadata": { + "type": "core", + "basename": "core", + "name": "core20", + "id": 20, + "uniq_id": 28, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core20" + } + } + }, + { + "id": "29", + "metadata": { + "type": "core", + "basename": "core", + "name": "core21", + "id": 21, + "uniq_id": 29, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core21" + } + } + }, + { + "id": "30", + "metadata": { + "type": "core", + "basename": "core", + "name": "core22", + "id": 22, + "uniq_id": 30, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core22" + } + } + }, + { + "id": "31", + "metadata": { + "type": "core", + "basename": "core", + "name": "core23", + "id": 23, + "uniq_id": 31, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core23" + } + } + }, + { + "id": "32", + "metadata": { + "type": "core", + "basename": "core", + "name": "core24", + "id": 24, + "uniq_id": 32, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core24" + } + } + }, + { + "id": "33", + "metadata": { + "type": "core", + "basename": "core", + "name": "core25", + "id": 25, + "uniq_id": 33, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core25" + } + } + }, + { + "id": "34", + "metadata": { + "type": "core", + "basename": "core", + "name": "core26", + "id": 26, + "uniq_id": 34, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core26" + } + } + }, + { + "id": "35", + "metadata": { + "type": "core", + "basename": "core", + "name": "core27", + "id": 27, + "uniq_id": 35, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core27" + } + } + }, + { + "id": "36", + "metadata": { + "type": "core", + "basename": "core", + "name": "core28", + "id": 28, + "uniq_id": 36, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core28" + } + } + }, + { + "id": "37", + "metadata": { + "type": "core", + "basename": "core", + "name": "core29", + "id": 29, + "uniq_id": 37, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core29" + } + } + }, + { + "id": "38", + "metadata": { + "type": "core", + "basename": "core", + "name": "core30", + "id": 30, + "uniq_id": 38, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core30" + } + } + }, + { + "id": "39", + "metadata": { + "type": "core", + "basename": "core", + "name": "core31", + "id": 31, + "uniq_id": 39, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core31" + } + } + }, + { + "id": "40", + "metadata": { + "type": "core", + "basename": "core", + "name": "core32", + "id": 32, + "uniq_id": 40, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core32" + } + } + }, + { + "id": "41", + "metadata": { + "type": "core", + "basename": "core", + "name": "core33", + "id": 33, + "uniq_id": 41, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core33" + } + } + }, + { + "id": "42", + "metadata": { + "type": "core", + "basename": "core", + "name": "core34", + "id": 34, + "uniq_id": 42, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core34" + } + } + }, + { + "id": "43", + "metadata": { + "type": "core", + "basename": "core", + "name": "core35", + "id": 35, + "uniq_id": 43, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core35" + } + } + }, + { + "id": "80", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu0", + "id": 0, + "uniq_id": 80, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/gpu0" + } + } + }, + { + "id": "81", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu1", + "id": 1, + "uniq_id": 81, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/gpu1" + } + } + }, + { + "id": "84", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory0", + "id": 0, + "uniq_id": 84, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory0" + } + } + }, + { + "id": "85", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory1", + "id": 1, + "uniq_id": 85, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory1" + } + } + }, + { + "id": "86", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory2", + "id": 2, + "uniq_id": 86, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory2" + } + } + }, + { + "id": "87", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory3", + "id": 3, + "uniq_id": 87, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory3" + } + } + }, + { + "id": "88", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory4", + "id": 4, + "uniq_id": 88, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory4" + } + } + }, + { + "id": "89", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory5", + "id": 5, + "uniq_id": 89, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory5" + } + } + }, + { + "id": "90", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory6", + "id": 6, + "uniq_id": 90, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory6" + } + } + }, + { + "id": "91", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory7", + "id": 7, + "uniq_id": 91, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory7" + } + } + } + ], + "edges": [ + { + "source": "2", + "target": "4", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "2", + "target": "5", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "2", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "4", + "target": "8", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "9", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "10", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "11", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "12", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "13", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "14", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "15", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "16", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "17", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "18", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "19", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "20", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "21", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "22", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "23", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "24", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "25", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "80", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "84", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "85", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "86", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "87", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "2", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "5", + "target": "26", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "27", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "28", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "29", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "30", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "31", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "32", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "33", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "34", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "35", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "36", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "37", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "38", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "39", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "40", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "41", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "42", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "43", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "81", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "88", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "89", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "90", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "91", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "8", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "9", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "10", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "11", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "12", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "13", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "14", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "15", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "16", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "17", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "18", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "19", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "20", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "21", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "22", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "23", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "24", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "25", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "26", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "27", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "28", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "29", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "30", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "31", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "32", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "33", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "34", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "35", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "36", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "37", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "38", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "39", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "40", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "41", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "42", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "43", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "80", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "81", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "84", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "85", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "86", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "87", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "88", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "89", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "90", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "91", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + } + ] + } +} diff --git a/t/data/resource/jgfs/elastic/tiny-partial-cancel.json b/t/data/resource/jgfs/elastic/tiny-partial-cancel.json new file mode 100644 index 000000000..cc9677a52 --- /dev/null +++ b/t/data/resource/jgfs/elastic/tiny-partial-cancel.json @@ -0,0 +1,3490 @@ +{ + "graph": { + "nodes": [ + { + "id": "0", + "metadata": { + "type": "cluster", + "basename": "tiny", + "name": "tiny0", + "id": 0, + "uniq_id": 0, + "rank": -1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0" + } + } + }, + { + "id": "1", + "metadata": { + "type": "rack", + "basename": "rack", + "name": "rack0", + "id": 0, + "uniq_id": 1, + "rank": -1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0" + } + } + }, + { + "id": "2", + "metadata": { + "type": "node", + "basename": "node", + "name": "node0", + "id": 0, + "uniq_id": 2, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0" + } + } + }, + { + "id": "3", + "metadata": { + "type": "node", + "basename": "node", + "name": "node1", + "id": 1, + "uniq_id": 3, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1" + } + } + }, + { + "id": "4", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket0", + "id": 0, + "uniq_id": 4, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0" + } + } + }, + { + "id": "5", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket1", + "id": 1, + "uniq_id": 5, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1" + } + } + }, + { + "id": "6", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket0", + "id": 0, + "uniq_id": 6, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0" + } + } + }, + { + "id": "7", + "metadata": { + "type": "socket", + "basename": "socket", + "name": "socket1", + "id": 1, + "uniq_id": 7, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1" + } + } + }, + { + "id": "8", + "metadata": { + "type": "core", + "basename": "core", + "name": "core0", + "id": 0, + "uniq_id": 8, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core0" + } + } + }, + { + "id": "9", + "metadata": { + "type": "core", + "basename": "core", + "name": "core1", + "id": 1, + "uniq_id": 9, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core1" + } + } + }, + { + "id": "10", + "metadata": { + "type": "core", + "basename": "core", + "name": "core2", + "id": 2, + "uniq_id": 10, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core2" + } + } + }, + { + "id": "11", + "metadata": { + "type": "core", + "basename": "core", + "name": "core3", + "id": 3, + "uniq_id": 11, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core3" + } + } + }, + { + "id": "12", + "metadata": { + "type": "core", + "basename": "core", + "name": "core4", + "id": 4, + "uniq_id": 12, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core4" + } + } + }, + { + "id": "13", + "metadata": { + "type": "core", + "basename": "core", + "name": "core5", + "id": 5, + "uniq_id": 13, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core5" + } + } + }, + { + "id": "14", + "metadata": { + "type": "core", + "basename": "core", + "name": "core6", + "id": 6, + "uniq_id": 14, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core6" + } + } + }, + { + "id": "15", + "metadata": { + "type": "core", + "basename": "core", + "name": "core7", + "id": 7, + "uniq_id": 15, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core7" + } + } + }, + { + "id": "16", + "metadata": { + "type": "core", + "basename": "core", + "name": "core8", + "id": 8, + "uniq_id": 16, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core8" + } + } + }, + { + "id": "17", + "metadata": { + "type": "core", + "basename": "core", + "name": "core9", + "id": 9, + "uniq_id": 17, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core9" + } + } + }, + { + "id": "18", + "metadata": { + "type": "core", + "basename": "core", + "name": "core10", + "id": 10, + "uniq_id": 18, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core10" + } + } + }, + { + "id": "19", + "metadata": { + "type": "core", + "basename": "core", + "name": "core11", + "id": 11, + "uniq_id": 19, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core11" + } + } + }, + { + "id": "20", + "metadata": { + "type": "core", + "basename": "core", + "name": "core12", + "id": 12, + "uniq_id": 20, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core12" + } + } + }, + { + "id": "21", + "metadata": { + "type": "core", + "basename": "core", + "name": "core13", + "id": 13, + "uniq_id": 21, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core13" + } + } + }, + { + "id": "22", + "metadata": { + "type": "core", + "basename": "core", + "name": "core14", + "id": 14, + "uniq_id": 22, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core14" + } + } + }, + { + "id": "23", + "metadata": { + "type": "core", + "basename": "core", + "name": "core15", + "id": 15, + "uniq_id": 23, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core15" + } + } + }, + { + "id": "24", + "metadata": { + "type": "core", + "basename": "core", + "name": "core16", + "id": 16, + "uniq_id": 24, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core16" + } + } + }, + { + "id": "25", + "metadata": { + "type": "core", + "basename": "core", + "name": "core17", + "id": 17, + "uniq_id": 25, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/core17" + } + } + }, + { + "id": "26", + "metadata": { + "type": "core", + "basename": "core", + "name": "core18", + "id": 18, + "uniq_id": 26, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core18" + } + } + }, + { + "id": "27", + "metadata": { + "type": "core", + "basename": "core", + "name": "core19", + "id": 19, + "uniq_id": 27, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core19" + } + } + }, + { + "id": "28", + "metadata": { + "type": "core", + "basename": "core", + "name": "core20", + "id": 20, + "uniq_id": 28, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core20" + } + } + }, + { + "id": "29", + "metadata": { + "type": "core", + "basename": "core", + "name": "core21", + "id": 21, + "uniq_id": 29, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core21" + } + } + }, + { + "id": "30", + "metadata": { + "type": "core", + "basename": "core", + "name": "core22", + "id": 22, + "uniq_id": 30, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core22" + } + } + }, + { + "id": "31", + "metadata": { + "type": "core", + "basename": "core", + "name": "core23", + "id": 23, + "uniq_id": 31, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core23" + } + } + }, + { + "id": "32", + "metadata": { + "type": "core", + "basename": "core", + "name": "core24", + "id": 24, + "uniq_id": 32, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core24" + } + } + }, + { + "id": "33", + "metadata": { + "type": "core", + "basename": "core", + "name": "core25", + "id": 25, + "uniq_id": 33, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core25" + } + } + }, + { + "id": "34", + "metadata": { + "type": "core", + "basename": "core", + "name": "core26", + "id": 26, + "uniq_id": 34, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core26" + } + } + }, + { + "id": "35", + "metadata": { + "type": "core", + "basename": "core", + "name": "core27", + "id": 27, + "uniq_id": 35, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core27" + } + } + }, + { + "id": "36", + "metadata": { + "type": "core", + "basename": "core", + "name": "core28", + "id": 28, + "uniq_id": 36, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core28" + } + } + }, + { + "id": "37", + "metadata": { + "type": "core", + "basename": "core", + "name": "core29", + "id": 29, + "uniq_id": 37, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core29" + } + } + }, + { + "id": "38", + "metadata": { + "type": "core", + "basename": "core", + "name": "core30", + "id": 30, + "uniq_id": 38, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core30" + } + } + }, + { + "id": "39", + "metadata": { + "type": "core", + "basename": "core", + "name": "core31", + "id": 31, + "uniq_id": 39, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core31" + } + } + }, + { + "id": "40", + "metadata": { + "type": "core", + "basename": "core", + "name": "core32", + "id": 32, + "uniq_id": 40, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core32" + } + } + }, + { + "id": "41", + "metadata": { + "type": "core", + "basename": "core", + "name": "core33", + "id": 33, + "uniq_id": 41, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core33" + } + } + }, + { + "id": "42", + "metadata": { + "type": "core", + "basename": "core", + "name": "core34", + "id": 34, + "uniq_id": 42, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core34" + } + } + }, + { + "id": "43", + "metadata": { + "type": "core", + "basename": "core", + "name": "core35", + "id": 35, + "uniq_id": 43, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/core35" + } + } + }, + { + "id": "44", + "metadata": { + "type": "core", + "basename": "core", + "name": "core0", + "id": 0, + "uniq_id": 44, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core0" + } + } + }, + { + "id": "45", + "metadata": { + "type": "core", + "basename": "core", + "name": "core1", + "id": 1, + "uniq_id": 45, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core1" + } + } + }, + { + "id": "46", + "metadata": { + "type": "core", + "basename": "core", + "name": "core2", + "id": 2, + "uniq_id": 46, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core2" + } + } + }, + { + "id": "47", + "metadata": { + "type": "core", + "basename": "core", + "name": "core3", + "id": 3, + "uniq_id": 47, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core3" + } + } + }, + { + "id": "48", + "metadata": { + "type": "core", + "basename": "core", + "name": "core4", + "id": 4, + "uniq_id": 48, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core4" + } + } + }, + { + "id": "49", + "metadata": { + "type": "core", + "basename": "core", + "name": "core5", + "id": 5, + "uniq_id": 49, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core5" + } + } + }, + { + "id": "50", + "metadata": { + "type": "core", + "basename": "core", + "name": "core6", + "id": 6, + "uniq_id": 50, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core6" + } + } + }, + { + "id": "51", + "metadata": { + "type": "core", + "basename": "core", + "name": "core7", + "id": 7, + "uniq_id": 51, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core7" + } + } + }, + { + "id": "52", + "metadata": { + "type": "core", + "basename": "core", + "name": "core8", + "id": 8, + "uniq_id": 52, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core8" + } + } + }, + { + "id": "53", + "metadata": { + "type": "core", + "basename": "core", + "name": "core9", + "id": 9, + "uniq_id": 53, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core9" + } + } + }, + { + "id": "54", + "metadata": { + "type": "core", + "basename": "core", + "name": "core10", + "id": 10, + "uniq_id": 54, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core10" + } + } + }, + { + "id": "55", + "metadata": { + "type": "core", + "basename": "core", + "name": "core11", + "id": 11, + "uniq_id": 55, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core11" + } + } + }, + { + "id": "56", + "metadata": { + "type": "core", + "basename": "core", + "name": "core12", + "id": 12, + "uniq_id": 56, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core12" + } + } + }, + { + "id": "57", + "metadata": { + "type": "core", + "basename": "core", + "name": "core13", + "id": 13, + "uniq_id": 57, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core13" + } + } + }, + { + "id": "58", + "metadata": { + "type": "core", + "basename": "core", + "name": "core14", + "id": 14, + "uniq_id": 58, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core14" + } + } + }, + { + "id": "59", + "metadata": { + "type": "core", + "basename": "core", + "name": "core15", + "id": 15, + "uniq_id": 59, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core15" + } + } + }, + { + "id": "60", + "metadata": { + "type": "core", + "basename": "core", + "name": "core16", + "id": 16, + "uniq_id": 60, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core16" + } + } + }, + { + "id": "61", + "metadata": { + "type": "core", + "basename": "core", + "name": "core17", + "id": 17, + "uniq_id": 61, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/core17" + } + } + }, + { + "id": "62", + "metadata": { + "type": "core", + "basename": "core", + "name": "core18", + "id": 18, + "uniq_id": 62, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core18" + } + } + }, + { + "id": "63", + "metadata": { + "type": "core", + "basename": "core", + "name": "core19", + "id": 19, + "uniq_id": 63, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core19" + } + } + }, + { + "id": "64", + "metadata": { + "type": "core", + "basename": "core", + "name": "core20", + "id": 20, + "uniq_id": 64, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core20" + } + } + }, + { + "id": "65", + "metadata": { + "type": "core", + "basename": "core", + "name": "core21", + "id": 21, + "uniq_id": 65, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core21" + } + } + }, + { + "id": "66", + "metadata": { + "type": "core", + "basename": "core", + "name": "core22", + "id": 22, + "uniq_id": 66, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core22" + } + } + }, + { + "id": "67", + "metadata": { + "type": "core", + "basename": "core", + "name": "core23", + "id": 23, + "uniq_id": 67, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core23" + } + } + }, + { + "id": "68", + "metadata": { + "type": "core", + "basename": "core", + "name": "core24", + "id": 24, + "uniq_id": 68, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core24" + } + } + }, + { + "id": "69", + "metadata": { + "type": "core", + "basename": "core", + "name": "core25", + "id": 25, + "uniq_id": 69, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core25" + } + } + }, + { + "id": "70", + "metadata": { + "type": "core", + "basename": "core", + "name": "core26", + "id": 26, + "uniq_id": 70, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core26" + } + } + }, + { + "id": "71", + "metadata": { + "type": "core", + "basename": "core", + "name": "core27", + "id": 27, + "uniq_id": 71, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core27" + } + } + }, + { + "id": "72", + "metadata": { + "type": "core", + "basename": "core", + "name": "core28", + "id": 28, + "uniq_id": 72, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core28" + } + } + }, + { + "id": "73", + "metadata": { + "type": "core", + "basename": "core", + "name": "core29", + "id": 29, + "uniq_id": 73, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core29" + } + } + }, + { + "id": "74", + "metadata": { + "type": "core", + "basename": "core", + "name": "core30", + "id": 30, + "uniq_id": 74, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core30" + } + } + }, + { + "id": "75", + "metadata": { + "type": "core", + "basename": "core", + "name": "core31", + "id": 31, + "uniq_id": 75, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core31" + } + } + }, + { + "id": "76", + "metadata": { + "type": "core", + "basename": "core", + "name": "core32", + "id": 32, + "uniq_id": 76, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core32" + } + } + }, + { + "id": "77", + "metadata": { + "type": "core", + "basename": "core", + "name": "core33", + "id": 33, + "uniq_id": 77, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core33" + } + } + }, + { + "id": "78", + "metadata": { + "type": "core", + "basename": "core", + "name": "core34", + "id": 34, + "uniq_id": 78, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core34" + } + } + }, + { + "id": "79", + "metadata": { + "type": "core", + "basename": "core", + "name": "core35", + "id": 35, + "uniq_id": 79, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/core35" + } + } + }, + { + "id": "80", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu0", + "id": 0, + "uniq_id": 80, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/gpu0" + } + } + }, + { + "id": "81", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu1", + "id": 1, + "uniq_id": 81, + "rank": 0, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/gpu1" + } + } + }, + { + "id": "82", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu0", + "id": 0, + "uniq_id": 82, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/gpu0" + } + } + }, + { + "id": "83", + "metadata": { + "type": "gpu", + "basename": "gpu", + "name": "gpu1", + "id": 1, + "uniq_id": 83, + "rank": 1, + "exclusive": false, + "unit": "", + "size": 1, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/gpu1" + } + } + }, + { + "id": "84", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory0", + "id": 0, + "uniq_id": 84, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory0" + } + } + }, + { + "id": "85", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory1", + "id": 1, + "uniq_id": 85, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory1" + } + } + }, + { + "id": "86", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory2", + "id": 2, + "uniq_id": 86, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory2" + } + } + }, + { + "id": "87", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory3", + "id": 3, + "uniq_id": 87, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket0/memory3" + } + } + }, + { + "id": "88", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory4", + "id": 4, + "uniq_id": 88, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory4" + } + } + }, + { + "id": "89", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory5", + "id": 5, + "uniq_id": 89, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory5" + } + } + }, + { + "id": "90", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory6", + "id": 6, + "uniq_id": 90, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory6" + } + } + }, + { + "id": "91", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory7", + "id": 7, + "uniq_id": 91, + "rank": 0, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node0/socket1/memory7" + } + } + }, + { + "id": "92", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory0", + "id": 0, + "uniq_id": 92, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/memory0" + } + } + }, + { + "id": "93", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory1", + "id": 1, + "uniq_id": 93, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/memory1" + } + } + }, + { + "id": "94", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory2", + "id": 2, + "uniq_id": 94, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/memory2" + } + } + }, + { + "id": "95", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory3", + "id": 3, + "uniq_id": 95, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket0/memory3" + } + } + }, + { + "id": "96", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory4", + "id": 4, + "uniq_id": 96, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/memory4" + } + } + }, + { + "id": "97", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory5", + "id": 5, + "uniq_id": 97, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/memory5" + } + } + }, + { + "id": "98", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory6", + "id": 6, + "uniq_id": 98, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/memory6" + } + } + }, + { + "id": "99", + "metadata": { + "type": "memory", + "basename": "memory", + "name": "memory7", + "id": 7, + "uniq_id": 99, + "rank": 1, + "exclusive": false, + "unit": "GB", + "size": 2, + "paths": { + "containment": "/tiny0/rack0/node1/socket1/memory7" + } + } + } + ], + "edges": [ + { + "source": "0", + "target": "1", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "1", + "target": "0", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "1", + "target": "2", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "1", + "target": "3", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "2", + "target": "1", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "2", + "target": "4", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "2", + "target": "5", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "3", + "target": "1", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "3", + "target": "6", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "3", + "target": "7", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "2", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "4", + "target": "8", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "9", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "10", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "11", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "12", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "13", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "14", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "15", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "16", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "17", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "18", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "19", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "20", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "21", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "22", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "23", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "24", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "25", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "80", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "84", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "85", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "86", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "4", + "target": "87", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "2", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "5", + "target": "26", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "27", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "28", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "29", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "30", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "31", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "32", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "33", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "34", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "35", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "36", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "37", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "38", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "39", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "40", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "41", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "42", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "43", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "81", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "88", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "89", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "90", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "5", + "target": "91", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "3", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "6", + "target": "44", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "45", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "46", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "47", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "48", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "49", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "50", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "51", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "52", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "53", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "54", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "55", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "56", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "57", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "58", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "59", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "60", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "61", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "82", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "92", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "93", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "94", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "6", + "target": "95", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "3", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "7", + "target": "62", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "63", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "64", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "65", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "66", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "67", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "68", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "69", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "70", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "71", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "72", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "73", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "74", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "75", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "76", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "77", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "78", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "79", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "83", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "96", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "97", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "98", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "7", + "target": "99", + "metadata": { + "name": { + "containment": "contains" + } + } + }, + { + "source": "8", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "9", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "10", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "11", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "12", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "13", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "14", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "15", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "16", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "17", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "18", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "19", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "20", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "21", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "22", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "23", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "24", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "25", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "26", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "27", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "28", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "29", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "30", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "31", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "32", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "33", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "34", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "35", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "36", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "37", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "38", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "39", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "40", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "41", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "42", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "43", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "44", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "45", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "46", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "47", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "48", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "49", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "50", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "51", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "52", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "53", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "54", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "55", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "56", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "57", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "58", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "59", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "60", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "61", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "62", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "63", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "64", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "65", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "66", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "67", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "68", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "69", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "70", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "71", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "72", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "73", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "74", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "75", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "76", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "77", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "78", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "79", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "80", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "81", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "82", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "83", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "84", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "85", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "86", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "87", + "target": "4", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "88", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "89", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "90", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "91", + "target": "5", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "92", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "93", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "94", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "95", + "target": "6", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "96", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "97", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "98", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + }, + { + "source": "99", + "target": "7", + "metadata": { + "name": { + "containment": "in" + } + } + } + ] + } +} diff --git a/t/data/resource/jobspecs/cancel/test018.yaml b/t/data/resource/jobspecs/cancel/test018.yaml new file mode 100644 index 000000000..1084f8dfc --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test018.yaml @@ -0,0 +1,23 @@ +version: 9999 +resources: + - type: slot + count: 2 + label: default + with: + - type: node + count: 1 + with: + - type: core + count: 16 + - type: gpu + count: 4 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/data/resource/jobspecs/cancel/test019.yaml b/t/data/resource/jobspecs/cancel/test019.yaml new file mode 100644 index 000000000..cd09981ad --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test019.yaml @@ -0,0 +1,23 @@ +version: 9999 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: 1 + with: + - type: core + count: 16 + - type: gpu + count: 4 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/data/resource/jobspecs/cancel/test020.yaml b/t/data/resource/jobspecs/cancel/test020.yaml new file mode 100644 index 000000000..5478faf16 --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test020.yaml @@ -0,0 +1,24 @@ +version: 9999 +resources: + - type: node + count: 2 + exclusive: false + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 8 + - type: gpu + count: 2 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/data/resource/jobspecs/cancel/test021.yaml b/t/data/resource/jobspecs/cancel/test021.yaml new file mode 100644 index 000000000..2b6876790 --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test021.yaml @@ -0,0 +1,24 @@ +version: 9999 +resources: + - type: node + count: 1 + exclusive: false + with: + - type: slot + count: 1 + label: default + with: + - type: core + count: 8 + - type: gpu + count: 2 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/data/resource/jobspecs/cancel/test022.yaml b/t/data/resource/jobspecs/cancel/test022.yaml new file mode 100644 index 000000000..dee6888da --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test022.yaml @@ -0,0 +1,28 @@ +version: 9999 +resources: + - type: slot + count: 2 + label: default + with: + - type: node + count: 1 + with: + - type: socket + count: 2 + with: + - type: core + count: 18 + - type: gpu + count: 1 + - type: memory + count: 8 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 + diff --git a/t/data/resource/jobspecs/cancel/test023.yaml b/t/data/resource/jobspecs/cancel/test023.yaml new file mode 100644 index 000000000..23d2c7d08 --- /dev/null +++ b/t/data/resource/jobspecs/cancel/test023.yaml @@ -0,0 +1,27 @@ +version: 9999 +resources: + - type: slot + count: 1 + label: default + with: + - type: node + count: 1 + with: + - type: socket + count: 2 + with: + - type: core + count: 18 + - type: gpu + count: 1 + - type: memory + count: 8 +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 \ No newline at end of file diff --git a/t/data/resource/rv1exec/cancel/rank0_cancel_nonexcl.json b/t/data/resource/rv1exec/cancel/rank0_cancel_nonexcl.json new file mode 100644 index 000000000..2afe800bc --- /dev/null +++ b/t/data/resource/rv1exec/cancel/rank0_cancel_nonexcl.json @@ -0,0 +1 @@ +{"version": 1, "execution": {"R_lite": [{"rank": "0", "children": {"core": "0-7", "gpu": "0-1"}}], "nodelist": ["node0"], "starttime": 0, "expiration": 1000000}} diff --git a/t/data/resource/rv1exec/cancel/rank1_cancel.json b/t/data/resource/rv1exec/cancel/rank1_cancel.json new file mode 100644 index 000000000..5e5db17ea --- /dev/null +++ b/t/data/resource/rv1exec/cancel/rank1_cancel.json @@ -0,0 +1 @@ +{"version": 1, "execution": {"R_lite": [{"rank": "1", "children": {"core": "0-15", "gpu": "0-3"}}], "nodelist": ["node1"], "starttime": 0, "expiration": 1000000}} diff --git a/t/data/resource/rv1exec/cancel/rank1_cancel_nonexcl.json b/t/data/resource/rv1exec/cancel/rank1_cancel_nonexcl.json new file mode 100644 index 000000000..24a636185 --- /dev/null +++ b/t/data/resource/rv1exec/cancel/rank1_cancel_nonexcl.json @@ -0,0 +1 @@ +{"version": 1, "execution": {"R_lite": [{"rank": "1", "children": {"core": "0-7", "gpu": "0-1"}}], "nodelist": ["node1"], "starttime": 0, "expiration": 1000000}} diff --git a/t/t3008-resource-cancel.t b/t/t3008-resource-cancel.t index 1c89e9b70..ef4ca6f0a 100755 --- a/t/t3008-resource-cancel.t +++ b/t/t3008-resource-cancel.t @@ -1,12 +1,14 @@ #!/bin/sh -test_description='Test reservations of jobs of varying geometries and durations' +test_description='Test cancellation of jobs of varying geometries and durations' . $(dirname $0)/sharness.sh cmd_dir="${SHARNESS_TEST_SRCDIR}/data/resource/commands/cancel" exp_dir="${SHARNESS_TEST_SRCDIR}/data/resource/expected/cancel" grugs="${SHARNESS_TEST_SRCDIR}/data/resource/grugs/resv_test.graphml" +rv1s="${SHARNESS_TEST_SRCDIR}/data/resource/rv1exec/tiny_rv1exec.json" +jgfs="${SHARNESS_TEST_SRCDIR}/data/resource/jgfs/elastic/tiny-partial-cancel.json" query="../../resource/utilities/resource-query" # @@ -117,4 +119,79 @@ test_expect_success "${test014_desc}" ' test_cmp 014.R.out ${exp_dir}/014.R.out ' +# Partial cancel/release -- Use low node policy +# Tests to ensure correctness of partial cancel/release behavior + +cmds015="${cmd_dir}/cmds03.in" +test015_desc="test reader file option" +test_expect_success "${test015_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds015} > cmds015 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low -t 015.R.out < cmds015 2>> 015.R.out && + test_cmp 015.R.out ${exp_dir}/015.R.out +' + +cmds016="${cmd_dir}/cmds04.in" +test016_desc="test partial cancel and reallocation of one rank" +test_expect_success "${test016_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds016} > cmds016 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low -t 016.R.out < cmds016 && + test_cmp 016.R.out ${exp_dir}/016.R.out +' + +cmds017="${cmd_dir}/cmds05.in" +test017_desc="check for unsupported partial cancel of reservation" +test_expect_success "${test017_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds017} > cmds017 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low -t 017.R.out < cmds017 2>> 017.R.out && + test_cmp 017.R.out ${exp_dir}/017.R.out +' + +cmds018="${cmd_dir}/cmds06.in" +test018_desc="partial cancel of full allocation is the same as full cancel" +test_expect_success "${test018_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds018} > cmds018 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low -t 018.R.out < cmds018 && + test_cmp 018.R.out ${exp_dir}/018.R.out +' + +cmds019="${cmd_dir}/cmds06.in" +test019_desc="partial cancel of full allocation is the same as full cancel with all pruning filters set" +test_expect_success "${test019_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds019} > cmds019 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low --prune-filters=ALL:core,ALL:node,ALL:gpu -t 019.R.out < cmds019 && + test_cmp 019.R.out ${exp_dir}/018.R.out +' + +cmds020="${cmd_dir}/cmds07.in" +test020_desc="test partial cancel and reallocation of non-exclusive jobs" +test_expect_success "${test020_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds020} > cmds020 && + ${query} -f rv1exec -L ${rv1s} -S CA -P low -t 019.R.out < cmds020 && + test_cmp 019.R.out ${exp_dir}/019.R.out +' + +cmds021="${cmd_dir}/cmds08.in" +test021_desc="test partial cancel and reallocation of one rank; jgf" +test_expect_success "${test021_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds021} > cmds021 && + ${query} -f jgf -L ${jgfs} -S CA -P low -t 020.R.out < cmds021 && + test_cmp 020.R.out ${exp_dir}/020.R.out +' + +cmds022="${cmd_dir}/cmds09.in" +test022_desc="partial cancel of full allocation is the same as full cancel; jgf" +test_expect_success "${test022_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds022} > cmds022 && + ${query} -f jgf -L ${jgfs} -S CA -P low -t 021.R.out < cmds022 && + test_cmp 021.R.out ${exp_dir}/021.R.out +' + +cmds023="${cmd_dir}/cmds09.in" +test023_desc="partial cancel of full allocation is the same as full cancel with all pruning filters set; jgf" +test_expect_success "${test023_desc}" ' + sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds023} > cmds023 && + ${query} -f jgf -L ${jgfs} -S CA -P low --prune-filters=ALL:core,ALL:node,ALL:gpu -t 022.R.out < cmds023 && + test_cmp 022.R.out ${exp_dir}/021.R.out +' + test_done From b6d44c07881e26a364af35f2a13e814d66386d0e Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 28 Jun 2024 20:05:15 -0700 Subject: [PATCH 15/24] reapi: add partial cancel C bindings Problem: partial cancel functionality isn't available in the C REAPI. Add the interface functions for module and cli. --- resource/reapi/bindings/c/reapi_cli.cpp | 12 ++++++++++++ resource/reapi/bindings/c/reapi_cli.h | 14 ++++++++++++++ resource/reapi/bindings/c/reapi_module.cpp | 12 ++++++++++++ resource/reapi/bindings/c/reapi_module.h | 14 ++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/resource/reapi/bindings/c/reapi_cli.cpp b/resource/reapi/bindings/c/reapi_cli.cpp index c36552645..09d99fde0 100644 --- a/resource/reapi/bindings/c/reapi_cli.cpp +++ b/resource/reapi/bindings/c/reapi_cli.cpp @@ -193,6 +193,18 @@ extern "C" int reapi_cli_cancel (reapi_cli_ctx_t *ctx, return reapi_cli_t::cancel (ctx->rqt, jobid, noent_ok); } +extern "C" int reapi_cli_partial_cancel (reapi_cli_ctx_t *ctx, + const uint64_t jobid, + const char *R, bool noent_ok, + bool *full_removal) +{ + if (!ctx || !ctx->rqt || !R) { + errno = EINVAL; + return -1; + } + return reapi_cli_t::cancel (ctx->rqt, jobid, R, noent_ok, *full_removal); +} + extern "C" int reapi_cli_info (reapi_cli_ctx_t *ctx, const uint64_t jobid, char **mode, bool *reserved, int64_t *at, double *ov) diff --git a/resource/reapi/bindings/c/reapi_cli.h b/resource/reapi/bindings/c/reapi_cli.h index f0a29a88a..a41ca3103 100644 --- a/resource/reapi/bindings/c/reapi_cli.h +++ b/resource/reapi/bindings/c/reapi_cli.h @@ -141,6 +141,20 @@ int reapi_cli_update_allocate (reapi_cli_ctx_t *ctx, int reapi_cli_cancel (reapi_cli_ctx_t *ctx, const uint64_t jobid, bool noent_ok); +/*! Cancel the allocation or reservation corresponding to jobid. + * + * \param ctx reapi_module_ctx_t context object + * \param jobid jobid of the uint64_t type. + * \param R R string to remove + * \param noent_ok don't return an error on nonexistent jobid + * \param full_removal don't return an error on nonexistent jobid + * \return 0 on success; -1 on error. + */ +int reapi_cli_partial_cancel (reapi_cli_ctx_t *ctx, + const uint64_t jobid, + const char *R, + bool noent_ok, bool *full_removal); + /*! Get the information on the allocation or reservation corresponding * to jobid. * diff --git a/resource/reapi/bindings/c/reapi_module.cpp b/resource/reapi/bindings/c/reapi_module.cpp index 4c23e490c..ccfb5c4b4 100644 --- a/resource/reapi/bindings/c/reapi_module.cpp +++ b/resource/reapi/bindings/c/reapi_module.cpp @@ -137,6 +137,18 @@ extern "C" int reapi_module_cancel (reapi_module_ctx_t *ctx, return reapi_module_t::cancel (ctx->h, jobid, noent_ok); } +extern "C" int reapi_module_partial_cancel (reapi_module_ctx_t *ctx, + const uint64_t jobid, + const char *R, bool noent_ok, + bool &full_removal) +{ + if (!ctx || !ctx->h || !R) { + errno = EINVAL; + return -1; + } + return reapi_module_t::cancel (ctx->h, jobid, R, noent_ok, full_removal); +} + extern "C" int reapi_module_info (reapi_module_ctx_t *ctx, const uint64_t jobid, bool *reserved, int64_t *at, double *ov) { diff --git a/resource/reapi/bindings/c/reapi_module.h b/resource/reapi/bindings/c/reapi_module.h index 3592c6d80..2e6737354 100644 --- a/resource/reapi/bindings/c/reapi_module.h +++ b/resource/reapi/bindings/c/reapi_module.h @@ -129,6 +129,20 @@ int reapi_module_update_allocate (reapi_module_ctx_t *ctx, int reapi_module_cancel (reapi_module_ctx_t *ctx, const uint64_t jobid, bool noent_ok); +/*! Cancel the allocation or reservation corresponding to jobid. + * + * \param ctx reapi_module_ctx_t context object + * \param jobid jobid of the uint64_t type. + * \param R R string to remove + * \param noent_ok don't return an error on nonexistent jobid + * \param full_removal don't return an error on nonexistent jobid + * \return 0 on success; -1 on error. + */ +int reapi_module_partial_cancel (reapi_module_ctx_t *ctx, + const uint64_t jobid, + const char *R, + bool noent_ok, bool &full_removal); + /*! Get the information on the allocation or reservation corresponding * to jobid. * From d3b0e4b597cdd670d80751d95117b61c627a3c45 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 28 Jun 2024 20:06:03 -0700 Subject: [PATCH 16/24] reapi: add partial cancel C++ bindings and implementation Problem: there is no partial cancel functionality available in the C++ REAPI for the CLI and module. Add the interface functions and the implementations for C++. --- resource/reapi/bindings/c++/reapi.hpp | 14 ++++ resource/reapi/bindings/c++/reapi_cli.hpp | 4 ++ .../reapi/bindings/c++/reapi_cli_impl.hpp | 66 +++++++++++++++++++ resource/reapi/bindings/c++/reapi_module.hpp | 2 + .../reapi/bindings/c++/reapi_module_impl.hpp | 40 ++++++++++- 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/resource/reapi/bindings/c++/reapi.hpp b/resource/reapi/bindings/c++/reapi.hpp index eea99932c..7c5823169 100644 --- a/resource/reapi/bindings/c++/reapi.hpp +++ b/resource/reapi/bindings/c++/reapi.hpp @@ -196,6 +196,20 @@ class reapi_t { return -1; } + /*! Cancel the allocation or reservation corresponding to jobid. + * + * \param ctx reapi_module_ctx_t context object + * \param jobid jobid of the uint64_t type. + * \param R R string to remove + * \param noent_ok don't return an error on nonexistent jobid + * \param full_removal bool indictating whether the job is fully canceled + * \return 0 on success; -1 on error. + */ + static int cancel (void *h, const uint64_t jobid, + const char *R, bool noent_ok, bool &full_removal) + { + return -1; + } /*! Get the information on the allocation or reservation corresponding * to jobid. diff --git a/resource/reapi/bindings/c++/reapi_cli.hpp b/resource/reapi/bindings/c++/reapi_cli.hpp index 8155c2b58..c81252e8b 100644 --- a/resource/reapi/bindings/c++/reapi_cli.hpp +++ b/resource/reapi/bindings/c++/reapi_cli.hpp @@ -86,6 +86,8 @@ class resource_query_t { void set_job (const uint64_t jobid, const std::shared_ptr &job); int remove_job (const uint64_t jobid); + int remove_job (const uint64_t jobid, const std::string &R, + bool &full_removal); void incr_job_counter (); /* Run the traverser to match the jobspec */ @@ -139,6 +141,8 @@ class reapi_cli_t : public reapi_t { const std::string &R, int64_t &at, double &ov, std::string &R_out); static int cancel (void *h, const uint64_t jobid, bool noent_ok); + static int cancel (void *h, const uint64_t jobid, const std::string &R, + bool noent_ok, bool &full_removal); static int find (void *h, std::string criteria, json_t *&o ); static int info (void *h, const uint64_t jobid, std::string &mode, bool &reserved, int64_t &at, double &ov); diff --git a/resource/reapi/bindings/c++/reapi_cli_impl.hpp b/resource/reapi/bindings/c++/reapi_cli_impl.hpp index be6f91435..0a5c860c5 100644 --- a/resource/reapi/bindings/c++/reapi_cli_impl.hpp +++ b/resource/reapi/bindings/c++/reapi_cli_impl.hpp @@ -203,6 +203,35 @@ int reapi_cli_t::cancel (void *h, const uint64_t jobid, bool noent_ok) return rc; } +int reapi_cli_t::cancel (void *h, const uint64_t jobid, const std::string &R, + bool noent_ok, bool &full_removal) +{ + resource_query_t *rq = static_cast (h); + int rc = -1; + + if (rq->allocation_exists (jobid)) { + if ( (rc = rq->remove_job (jobid, R, full_removal)) == 0) { + if (full_removal) + rq->erase_allocation (jobid); + } + } else { + m_err_msg += __FUNCTION__; + m_err_msg += ": WARNING: can't find allocation for jobid: " + + std::to_string (jobid) + "\n"; + rc = 0; + goto out; + } + + if (rc != 0) { + m_err_msg += __FUNCTION__; + m_err_msg += ": ERROR: error encountered while removing job " + + std::to_string (jobid) + "\n"; + } + +out: + return rc; +} + int reapi_cli_t::find (void *h, std::string criteria, json_t *&o ) { @@ -687,6 +716,43 @@ int resource_query_t::remove_job (const uint64_t jobid) return rc; } +int resource_query_t::remove_job (const uint64_t jobid, const std::string &R, + bool &full_removal) +{ + int rc = -1; + std::shared_ptr reader; + + if (jobid > (uint64_t) std::numeric_limits::max ()) { + errno = EOVERFLOW; + return rc; + } + if (R == "") { + errno = EINVAL; + return rc; + } + if ( (reader = create_resource_reader (params.load_format)) == nullptr) { + m_err_msg = __FUNCTION__; + m_err_msg += ": ERROR: can't create reader\n"; + return rc; + } + + rc = traverser->remove (R, reader, static_cast (jobid), + full_removal); + if (rc == 0) { + if (full_removal) { + auto job_info_it = jobs.find (jobid); + if (job_info_it != jobs.end ()) { + job_info_it->second->state = job_lifecycle_t::CANCELED; + } + } + } else { + m_err_msg += traverser->err_message (); + traverser->clear_err_message (); + } + + return rc; +} + void resource_query_t::incr_job_counter () { jobid_counter++; diff --git a/resource/reapi/bindings/c++/reapi_module.hpp b/resource/reapi/bindings/c++/reapi_module.hpp index 33c2c8be6..f3b14504e 100644 --- a/resource/reapi/bindings/c++/reapi_module.hpp +++ b/resource/reapi/bindings/c++/reapi_module.hpp @@ -47,6 +47,8 @@ class reapi_module_t : public reapi_t { const std::string &R, int64_t &at, double &ov, std::string &R_out); static int cancel (void *h, const uint64_t jobid, bool noent_ok); + static int cancel (void *h, const uint64_t jobid, const std::string &R, + bool noent_ok, bool &full_removal); static int info (void *h, const uint64_t jobid, bool &reserved, int64_t &at, double &ov); static int stat (void *h, int64_t &V, int64_t &E,int64_t &J, diff --git a/resource/reapi/bindings/c++/reapi_module_impl.hpp b/resource/reapi/bindings/c++/reapi_module_impl.hpp index b802fd257..20308e3ad 100644 --- a/resource/reapi/bindings/c++/reapi_module_impl.hpp +++ b/resource/reapi/bindings/c++/reapi_module_impl.hpp @@ -222,12 +222,50 @@ int reapi_module_t::cancel (void *h, const uint64_t jobid, bool noent_ok) if (noent_ok && errno == ENOENT) { errno = saved_errno; rc = 0; - } + } + goto out; + } + rc = 0; + +out: + flux_future_destroy (f); + return rc; +} + +int reapi_module_t::cancel (void *h, const uint64_t jobid, + const std::string &R, bool noent_ok, + bool &full_removal) +{ + int rc = -1; + flux_t *fh = (flux_t *)h; + flux_future_t *f = NULL; + int saved_errno; + int ret_removal = 0; + + if (!fh || R == "" || jobid > INT64_MAX) { + errno = EINVAL; + goto out; + } + if (!(f = flux_rpc_pack (fh, "sched-fluxion-resource.partial-cancel", + FLUX_NODEID_ANY, 0, + "{s:I s:s}", + "jobid", (const int64_t)jobid, + "R", R.c_str ()))) { + goto out; + } + saved_errno = errno; + if ( (rc = flux_rpc_get_unpack (f, "{s:i}", + "full-removal", &ret_removal)) < 0) { + if (noent_ok && (errno == ENOENT)) { + errno = saved_errno; + rc = 0; + } goto out; } rc = 0; out: + full_removal = (ret_removal != 0); flux_future_destroy (f); return rc; } From 06dfb84c5264aa1d3f6a9cadec890ff7841730ab Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 28 Jun 2024 20:46:28 -0700 Subject: [PATCH 17/24] resource match: add partial cancel functionality Problem: the resource module doesn't have support for partial cancellation. Add the callback for partial cancellation, and the logic to distinguish between a single partial cancellation and a sequence of partial cancellations that results in a full cancellation. --- resource/modules/resource_match.cpp | 73 +++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp index d8ce6ba76..c9d2b3f27 100644 --- a/resource/modules/resource_match.cpp +++ b/resource/modules/resource_match.cpp @@ -220,6 +220,9 @@ static void update_request_cb (flux_t *h, flux_msg_handler_t *w, static void cancel_request_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg); +static void partial_cancel_request_cb (flux_t *h, flux_msg_handler_t *w, + const flux_msg_t *msg, void *arg); + static void info_request_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg); @@ -271,6 +274,8 @@ static const struct flux_msg_handler_spec htab[] = { "sched-fluxion-resource.update", update_request_cb, 0}, { FLUX_MSGTYPE_REQUEST, "sched-fluxion-resource.cancel", cancel_request_cb, 0 }, + { FLUX_MSGTYPE_REQUEST, + "sched-fluxion-resource.partial-cancel", partial_cancel_request_cb, 0 }, { FLUX_MSGTYPE_REQUEST, "sched-fluxion-resource.info", info_request_cb, 0 }, { FLUX_MSGTYPE_REQUEST, @@ -1846,12 +1851,27 @@ static void update_request_cb (flux_t *h, flux_msg_handler_t *w, flux_log_error (h, "%s: flux_respond_error", __FUNCTION__); } -static int run_remove (std::shared_ptr &ctx, int64_t jobid) +static int run_remove (std::shared_ptr &ctx, int64_t jobid, + const char *R, bool part_cancel, bool &full_removal) { int rc = -1; dfu_traverser_t &tr = *(ctx->traverser); - if ((rc = tr.remove (jobid)) < 0) { + if (part_cancel) { + // RV1exec only reader supported in production currently + std::shared_ptr reader; + if ( (reader = create_resource_reader ("rv1exec")) == nullptr) { + rc = -1; + flux_log (ctx->h, LOG_ERR, "%s: creating rv1exec reader (id=%jd)", + __FUNCTION__, static_cast (jobid)); + goto out; + } + rc = tr.remove (R, reader, jobid, full_removal); + } else { + rc = tr.remove (jobid); + full_removal = true; + } + if (rc != 0) { if (is_existent_jobid (ctx, jobid)) { // When this condition arises, we will be less likely // to be able to reuse this jobid. Having the errored job @@ -1865,7 +1885,7 @@ static int run_remove (std::shared_ptr &ctx, int64_t jobid) } goto out; } - if (is_existent_jobid (ctx, jobid)) + if (full_removal && is_existent_jobid (ctx, jobid)) ctx->jobs.erase (jobid); rc = 0; @@ -2013,6 +2033,8 @@ static void cancel_request_cb (flux_t *h, flux_msg_handler_t *w, { std::shared_ptr ctx = getctx ((flux_t *)arg); int64_t jobid = -1; + char *R = NULL; + bool full_removal = true; if (flux_request_unpack (msg, NULL, "{s:I}", "jobid", &jobid) < 0) goto error; @@ -2027,7 +2049,7 @@ static void cancel_request_cb (flux_t *h, flux_msg_handler_t *w, goto error; } - if (run_remove (ctx, jobid) < 0) { + if (run_remove (ctx, jobid, R, false, full_removal) < 0) { flux_log_error (h, "%s: remove fails due to match error (id=%jd)", __FUNCTION__, (intmax_t)jobid); goto error; @@ -2042,6 +2064,49 @@ static void cancel_request_cb (flux_t *h, flux_msg_handler_t *w, flux_log_error (h, "%s: flux_respond_error", __FUNCTION__); } +static void partial_cancel_request_cb (flux_t *h, flux_msg_handler_t *w, + const flux_msg_t *msg, void *arg) +{ + std::shared_ptr ctx = getctx ((flux_t *)arg); + int64_t jobid = -1; + char *R = NULL; + decltype(ctx->allocations)::iterator jobid_it; + bool full_removal = false; + int int_full_removal = 0; + + if (flux_request_unpack (msg, NULL, "{s:I s:s}", + "jobid", &jobid, + "R", &R) < 0) + goto error; + + jobid_it = ctx->allocations.find (jobid); + if (jobid_it == ctx->allocations.end ()) { + errno = ENOENT; + flux_log (h, LOG_DEBUG, "%s: job (id=%jd) not found in allocations", + __FUNCTION__, (intmax_t)jobid); + goto error; + } + + if (run_remove (ctx, jobid, R, true, full_removal) < 0) { + flux_log_error (h, "%s: remove fails due to match error (id=%jd)", + __FUNCTION__, (intmax_t)jobid); + goto error; + } + int_full_removal = full_removal; + if (flux_respond_pack (h, msg, "{s:i}", + "full-removal", int_full_removal) < 0) + flux_log_error (h, "%s", __FUNCTION__); + + if (full_removal) + ctx->allocations.erase (jobid_it); + + return; + +error: + if (flux_respond_error (h, msg, errno, NULL) < 0) + flux_log_error (h, "%s: flux_respond_error", __FUNCTION__); +} + static void info_request_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg) { From e4d6d933febfdbb5d8d68dcc01a9c24e81eef71c Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 1 Jul 2024 00:17:51 -0700 Subject: [PATCH 18/24] qmanager: add partial cancel support in callback Problem: the callback that handles the `.free` RPC does not unpack the R string payload. Add the capability to unpack the R string and call the new partial remove function. --- qmanager/modules/qmanager_callbacks.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/qmanager/modules/qmanager_callbacks.cpp b/qmanager/modules/qmanager_callbacks.cpp index 3afb5f6c4..873310ba1 100644 --- a/qmanager/modules/qmanager_callbacks.cpp +++ b/qmanager/modules/qmanager_callbacks.cpp @@ -292,29 +292,42 @@ void qmanager_cb_t::jobmanager_free_cb (flux_t *h, const flux_msg_t *msg, const char *R, void *arg) { flux_jobid_t id; + json_t *Res; + const char *Rstr = NULL; qmanager_cb_ctx_t *ctx = nullptr; ctx = static_cast (arg); std::shared_ptr queue; std::string queue_name; - if (flux_request_unpack (msg, NULL, "{s:I}", "id", &id) < 0) { + if (flux_request_unpack (msg, NULL, "{s:I s:O}", + "id", &id, "R", &Res) < 0) { flux_log_error (h, "%s: flux_request_unpack", __FUNCTION__); return; } + if (!(Rstr = json_dumps (Res, JSON_COMPACT))) { + errno = ENOMEM; + flux_log (h, LOG_ERR, "%s: json_dumps ", __FUNCTION__); + goto done; + } if (ctx->find_queue (id, queue_name, queue) < 0) { flux_log_error (h, "%s: can't find queue for job (id=%jd)", __FUNCTION__, static_cast (id)); - return; + goto done; } - if ((queue->remove (id)) < 0) { + if ( (queue->remove (static_cast (h), id, Rstr)) < 0) { flux_log_error (h, "%s: remove (queue=%s id=%jd)", __FUNCTION__, - queue_name.c_str (), static_cast (id)); - return; + queue_name.c_str (), static_cast (id)); + goto done; } if (schedutil_free_respond (ctx->schedutil, msg) < 0) { flux_log_error (h, "%s: schedutil_free_respond", __FUNCTION__); - return; + goto done; } + +done: + json_decref (Res); + free ((void *)Rstr); + return; } void qmanager_cb_t::jobmanager_cancel_cb (flux_t *h, const flux_msg_t *msg, From 665512cf6796b09e8c3bb63f4b7f5f959e7fbb73 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 1 Jul 2024 00:20:18 -0700 Subject: [PATCH 19/24] qmanager policies: add partial cancel in base, derived policies Problem: the qmanager base policy and derived-classes do not have partial cancellation functionality. Add a virtual remove function overload in base, and overrides in FCFS and backfill policies. In each case the policy needs to call the REAPI module partial cancel function, which means it can't reside in the base class. Consolidate logic in the `remove` function to call partial cancel and check if it fully removed the job's resources. If so, remove the job from the allocated, running maps and set the state to be `COMPLETE`. Do not enter the job into the completed map, because that will get popped and cancelled again in `cancel_completed_jobs`. Note that the sched loop needs to be cancelled and blocked jobs need to be reconsidered. Finally, resume the sched loop to continue scheduling jobs. --- qmanager/modules/qmanager.cpp | 3 - qmanager/policies/base/queue_policy_base.hpp | 142 ++++++++---------- qmanager/policies/queue_policy_bf_base.hpp | 3 +- .../policies/queue_policy_bf_base_impl.hpp | 24 +-- qmanager/policies/queue_policy_fcfs.hpp | 3 +- qmanager/policies/queue_policy_fcfs_impl.hpp | 21 +-- 6 files changed, 83 insertions(+), 113 deletions(-) diff --git a/qmanager/modules/qmanager.cpp b/qmanager/modules/qmanager.cpp index 20c12281e..040174921 100644 --- a/qmanager/modules/qmanager.cpp +++ b/qmanager/modules/qmanager.cpp @@ -580,9 +580,6 @@ static void qmanager_destroy (std::shared_ptr &ctx) while ( (job = ctx->queues.at (kv.first)->pending_pop ()) != nullptr) flux_respond_error (ctx->h, job->msg, ENOSYS, "unloading"); - while ( (job = ctx->queues.at (kv.first)->complete_pop ()) - != nullptr) - flux_respond_error (ctx->h, job->msg, ENOSYS, "unloading"); } schedutil_destroy (ctx->schedutil); flux_watcher_destroy (ctx->prep); diff --git a/qmanager/policies/base/queue_policy_base.hpp b/qmanager/policies/base/queue_policy_base.hpp index 1ffd385b4..76ec7442a 100644 --- a/qmanager/policies/base/queue_policy_base.hpp +++ b/qmanager/policies/base/queue_policy_base.hpp @@ -492,49 +492,80 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t * state. This queue becomes "schedulable" if pending job * queue is not empty: i.e., is_schedulable() returns true; * + * \param h Opaque handle. How it is used is an implementation + * detail. However, when it is used within a Flux's + * service module such as qmanager, it is expected + * to be a pointer to a flux_t object. * \param id jobid of flux_jobid_t type. + * \param R Resource set for partial cancel * \return 0 on success; -1 on error. * ENOENT: unknown id. */ - int remove (flux_jobid_t id) + int remove (void *h, flux_jobid_t id, const char *R) { - int rc = -1; - std::shared_ptr job = nullptr; + int rc = -1; + bool full_removal = false; - if (m_jobs.find (id) == m_jobs.end ()) { - errno = ENOENT; - goto out; - } + auto job_it = m_jobs.find (id); + if (job_it == m_jobs.end ()) { + errno = ENOENT; + goto out; + } - job = m_jobs[id]; - switch (job->state) { - case job_state_kind_t::PENDING: - this->remove_pending(job.get ()); - break; - case job_state_kind_t::ALLOC_RUNNING: - m_alloced.erase (job->t_stamps.running_ts); - // deliberately fall through - case job_state_kind_t::RUNNING: - m_running.erase (job->t_stamps.running_ts); - job->t_stamps.complete_ts = m_cq_cnt++; - job->state = job_state_kind_t::COMPLETE; - m_complete.insert (std::pair (job->t_stamps.complete_ts, - job->id)); - set_schedulability (true); - break; - default: + switch (job_it->second->state) { + case job_state_kind_t::PENDING: + this->remove_pending(job_it->second.get ()); + break; + case job_state_kind_t::ALLOC_RUNNING: + // deliberately fall through + case job_state_kind_t::RUNNING: + if ( (rc = cancel (h, job_it->second->id, R, true, full_removal) != 0)) break; + if (full_removal) { + m_alloced.erase (job_it->second->t_stamps.running_ts); + m_running.erase (job_it->second->t_stamps.running_ts); + job_it->second->t_stamps.complete_ts = m_cq_cnt++; + job_it->second->state = job_state_kind_t::COMPLETE; + m_jobs.erase (job_it); } - // with a job finishing or being canceled, restart the sched loop - cancel_sched_loop (); - // blocked jobs must be reconsidered after a job completes - // this covers cases where jobs that couldn't run because of an - // existing job's reservation can when it completes early - reconsider_blocked_jobs (); - rc = 0; - out: - return rc; + set_schedulability (true); + break; + default: + break; + } + cancel_sched_loop (); + // blocked jobs must be reconsidered after a job completes + // this covers cases where jobs that couldn't run because of an + // existing job's reservation can when it completes early + reconsider_blocked_jobs (); + + rc = 0; +out: + return rc; + } + + /*! Remove a job whose jobid is id from any internal queues + * (e.g., pending queue, running queue, and alloced queue.) + * If succeeds, it changes the pending queue or resource + * state. This queue becomes "schedulable" if pending job + * queue is not empty: i.e., is_schedulable() returns true; + * + * \param h Opaque handle. How it is used is an implementation + * detail. However, when it is used within a Flux's + * service module such as qmanager, it is expected + * to be a pointer to a flux_t object. + * \param id jobid of flux_jobid_t type. + * \param R Resource set for partial cancel + * \param noent_ok don't return an error on nonexistent jobid + * \param full_removal bool indictating whether the job is fully canceled + * \return 0 on success; -1 on error. + * ENOENT: unknown id. + */ + virtual int cancel (void *h, flux_jobid_t id, const char *R, bool noent_ok, + bool &full_removal) + { + full_removal = true; + return 0; } /*! Return true if this queue has become schedulable since @@ -649,26 +680,6 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t return job; } - /*! Pop the first job from the internal completed job queue. - * The popped is completely graduated from the queue policy layer. - * \return a shared pointer pointing to a job_t object - * on success; nullptr when the queue is empty. - */ - std::shared_ptr complete_pop () { - std::shared_ptr job; - flux_jobid_t id; - if (m_complete.empty ()) - return nullptr; - id = m_complete.begin ()->second; - if (m_jobs.find (id) == m_jobs.end ()) - return nullptr; - job = m_jobs[id]; - m_complete.erase (job->t_stamps.complete_ts); - m_jobs.erase (id); - return job; - } - - /*! Pop the first job from the alloced job queue. The popped * job still remains in the queue policy layer (i.e., in the * internal running job queue). @@ -939,28 +950,6 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t return 0; } - std::map::iterator - to_complete (std::map::iterator running_iter) - { - flux_jobid_t id = running_iter->second; - if (m_jobs.find (id) == m_jobs.end ()) { - errno = EINVAL; - return running_iter; - } - - std::shared_ptr job = m_jobs[id]; - job->state = job_state_kind_t::COMPLETE; - job->t_stamps.complete_ts = m_cq_cnt++; - auto res = m_complete.insert (std::pair( - job->t_stamps.complete_ts, job->id)); - if (!res.second) { - errno = ENOMEM; - return running_iter; - } - m_alloced.erase (job->t_stamps.running_ts); - return m_running.erase (running_iter); - } - job_map_iter to_rejected (job_map_iter pending_iter, const std::string ¬e) { @@ -1008,7 +997,6 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t unsigned int>> m_pending_reprio_provisional; std::map m_running; std::map m_alloced; - std::map m_complete; std::map m_rejected; std::map m_canceled; std::map> m_jobs; diff --git a/qmanager/policies/queue_policy_bf_base.hpp b/qmanager/policies/queue_policy_bf_base.hpp index 2ab955a69..01cb11227 100644 --- a/qmanager/policies/queue_policy_bf_base.hpp +++ b/qmanager/policies/queue_policy_bf_base.hpp @@ -31,6 +31,8 @@ class queue_policy_bf_base_t : public queue_policy_base_t virtual int handle_match_success (flux_jobid_t jobid, const char *status, const char *R, int64_t at, double ov); virtual int handle_match_failure (flux_jobid_t jobid, int errcode); + int cancel (void *h, flux_jobid_t id, const char *R, bool noent_ok, + bool &full_removal) override; protected: unsigned int m_reservation_depth; @@ -38,7 +40,6 @@ class queue_policy_bf_base_t : public queue_policy_base_t private: int next_match_iter (); - int cancel_completed_jobs (void *h); int cancel_reserved_jobs (void *h); int allocate_orelse_reserve_jobs (void *h); std::map m_reserved; diff --git a/qmanager/policies/queue_policy_bf_base_impl.hpp b/qmanager/policies/queue_policy_bf_base_impl.hpp index 8748b3bd4..f1c712f45 100644 --- a/qmanager/policies/queue_policy_bf_base_impl.hpp +++ b/qmanager/policies/queue_policy_bf_base_impl.hpp @@ -24,20 +24,6 @@ namespace detail { // Private Methods of Queue Policy Backfill Base //////////////////////////////////////////////////////////////////////////////// -template -int queue_policy_bf_base_t::cancel_completed_jobs (void *h) -{ - int rc = 0; - std::shared_ptr job; - - // Pop newly completed jobs (e.g., per a free request from - // job-manager as received by qmanager) to remove them from the - // resource infrastructure. - while ((job = complete_pop ()) != nullptr) - rc += reapi_type::cancel (h, job->id, true); - return rc; -} - template int queue_policy_bf_base_t::cancel_reserved_jobs (void *h) { @@ -116,6 +102,13 @@ int queue_policy_bf_base_t::allocate_orelse_reserve_jobs (void *h) return 0; } +template +int queue_policy_bf_base_t::cancel (void *h, flux_jobid_t id, + const char *R, bool noent_ok, + bool &full_removal) +{ + return reapi_type::cancel (h, id, R, noent_ok, full_removal); +} //////////////////////////////////////////////////////////////////////////////// // Public API of Queue Policy Backfill Base @@ -227,9 +220,6 @@ int queue_policy_bf_base_t::run_sched_loop (void *h, bool use_alloce int rc = 0; if (!is_sched_loop_active ()) { set_schedulability (false); - rc = cancel_completed_jobs (h); - if (rc != 0) - return rc; rc = cancel_reserved_jobs (h); if (rc != 0) return rc; diff --git a/qmanager/policies/queue_policy_fcfs.hpp b/qmanager/policies/queue_policy_fcfs.hpp index d4f5402cb..744a03794 100644 --- a/qmanager/policies/queue_policy_fcfs.hpp +++ b/qmanager/policies/queue_policy_fcfs.hpp @@ -30,9 +30,10 @@ class queue_policy_fcfs_t : public queue_policy_base_t virtual int handle_match_success (flux_jobid_t jobid, const char *status, const char *R, int64_t at, double ov); virtual int handle_match_failure (flux_jobid_t jobid, int errcode); + int cancel (void *h, flux_jobid_t id, const char *R, bool noent_ok, + bool &full_removal) override; private: - int cancel_completed_jobs (void *h); int pack_jobs (json_t *jobs); int allocate_jobs (void *h, bool use_alloced_queue); bool m_queue_depth_limit = false; diff --git a/qmanager/policies/queue_policy_fcfs_impl.hpp b/qmanager/policies/queue_policy_fcfs_impl.hpp index b519f46b7..1181e0040 100644 --- a/qmanager/policies/queue_policy_fcfs_impl.hpp +++ b/qmanager/policies/queue_policy_fcfs_impl.hpp @@ -25,19 +25,6 @@ namespace detail { // Private Methods of Queue Policy FCFS //////////////////////////////////////////////////////////////////////////////// -template -int queue_policy_fcfs_t::cancel_completed_jobs (void *h) -{ - int rc = 0; - std::shared_ptr job; - - // Pop newly completed jobs (e.g., per a free request from job-manager - // as received by qmanager) to remove them from the resource infrastructure. - while ((job = complete_pop ()) != nullptr) - rc += reapi_type::cancel (h, job->id, true); - return rc; -} - template int queue_policy_fcfs_t::pack_jobs (json_t *jobs) { @@ -157,6 +144,13 @@ int queue_policy_fcfs_t::handle_match_failure (flux_jobid_t jobid, i return 0; } +template +int queue_policy_fcfs_t::cancel (void *h, flux_jobid_t id, + const char *R, bool noent_ok, + bool &full_removal) +{ + return reapi_type::cancel (h, id, R, noent_ok, full_removal); +} //////////////////////////////////////////////////////////////////////////////// @@ -183,7 +177,6 @@ int queue_policy_fcfs_t::run_sched_loop (void *h, return 1; int rc = 0; set_schedulability (false); - rc = cancel_completed_jobs (h); rc += allocate_jobs (h, use_alloced_queue); return rc; } From 4947dffda09d5017897eb26402fff92f45557373 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Wed, 3 Jul 2024 01:40:28 -0700 Subject: [PATCH 20/24] planner_multi: remove extraneous string constructions Problem: std::strings are currently constructed from char * input parameters and used to find resources and sub-planners, creating unnecessary overhead. Remove the extraneous constructions. --- resource/planner/c++/planner_multi.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/resource/planner/c++/planner_multi.cpp b/resource/planner/c++/planner_multi.cpp index 407378071..2f051c83f 100644 --- a/resource/planner/c++/planner_multi.cpp +++ b/resource/planner/c++/planner_multi.cpp @@ -231,13 +231,12 @@ planner_t *planner_multi::get_planner_at (size_t i) const planner_t *planner_multi::get_planner_at (const char *type) const { auto &by_res = m_types_totals_planners.get (); - return by_res.find (std::string (type))->planner; + return by_res.find (type)->planner; } void planner_multi::update_planner_index (const char *type, size_t i) { - std::string rtype = std::string (type); - auto by_res = m_types_totals_planners.get ().find (rtype); + auto by_res = m_types_totals_planners.get ().find (type); auto new_idx = m_types_totals_planners.begin () + i; auto curr_idx = m_types_totals_planners.get ().iterator_to (*by_res); // noop if new_idx == curr_idx @@ -253,7 +252,7 @@ int planner_multi::update_planner_total (uint64_t total, size_t i) bool planner_multi::planner_at (const char *type) const { auto &by_res = m_types_totals_planners.get (); - auto result = by_res.find (std::string (type)); + auto result = by_res.find (type); if (result == by_res.end ()) return false; else @@ -273,7 +272,7 @@ int64_t planner_multi::get_resource_total_at (size_t i) const int64_t planner_multi::get_resource_total_at (const char *type) const { auto &by_res = m_types_totals_planners.get (); - auto result = by_res.find (std::string (type)); + auto result = by_res.find (type); if (result == by_res.end ()) return -1; else From 7214075443dfd6b4168f3a9297f1211dbf179deb Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Wed, 3 Jul 2024 19:49:13 -0700 Subject: [PATCH 21/24] cmd/flux-ion-resource: add flux-ion partial cancel functionality Problem: flux ion-resource does not support partial cancellation. Add support for sending partial cancel RPCs. --- src/cmd/flux-ion-resource.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/cmd/flux-ion-resource.py b/src/cmd/flux-ion-resource.py index cbe118573..8fa71ef15 100755 --- a/src/cmd/flux-ion-resource.py +++ b/src/cmd/flux-ion-resource.py @@ -78,6 +78,10 @@ def rpc_cancel(self, jobid): payload = {"jobid": jobid} return self.handle.rpc("sched-fluxion-resource.cancel", payload).get() + def rpc_partial_cancel(self, jobid, rv1exec): + payload = {"jobid": jobid, "R": rv1exec} + return self.handle.rpc("sched-fluxion-resource.partial-cancel", payload).get() + def rpc_set_property(self, sp_resource_path, sp_keyval): payload = {"sp_resource_path": sp_resource_path, "sp_keyval": sp_keyval} return self.handle.rpc("sched-fluxion-resource.set_property", payload).get() @@ -199,6 +203,18 @@ def cancel_action(args): rmod.rpc_cancel(jobid) +def partial_cancel_action(args): + """ + Action for partial cancel sub-command + """ + + with open(args.rv1exec, "r") as stream: + rv1exec = json.dumps(json.load(stream)) + rmod = ResourceModuleInterface() + jobid = args.jobid + rmod.rpc_partial_cancel(jobid, rv1exec) + + def info_action(args): """ Action for info sub-command @@ -533,6 +549,7 @@ def mkparser(name, help_desc): parser_s = mkparser("stats", "Print overall performance statistics.") parser_sc = mkparser("stats-cancel", "Clear overall performance statistics.") parser_c = mkparser("cancel", "Cancel an allocated or reserved job.") + parser_pc = mkparser("partial-cancel", "Partially cancel an allocated job.") parse_find(mkparser("find", "Find resources matching with a criteria.")) parser_st = mkparser("status", "Display resource status.") parse_set_status(mkparser("set-status", "Set up/down status of a resource vertex.")) @@ -561,6 +578,13 @@ def mkparser(name, help_desc): parser_c.add_argument("jobid", metavar="Jobid", type=JobID, help="Jobid") parser_c.set_defaults(func=cancel_action) + # + # Positional argument for partial cancel sub-command + # + parser_pc.add_argument("jobid", metavar="Jobid", type=JobID, help="Jobid") + parser_pc.add_argument("rv1exec", metavar="rv1exec", type=str, help="RV1exec") + parser_pc.set_defaults(func=partial_cancel_action) + # # Positional argument for find sub-command # From 5d478e4578e332e8485f089b5904d7c4d5b34426 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Wed, 3 Jul 2024 19:49:49 -0700 Subject: [PATCH 22/24] testsuite: add partial cancel tests for flux-ion Problem: flux ion-resource does not have testsuite tests. Add them. --- t/t4003-cancel-info.t | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t4003-cancel-info.t b/t/t4003-cancel-info.t index ae7a384f7..f287c7bcc 100755 --- a/t/t4003-cancel-info.t +++ b/t/t4003-cancel-info.t @@ -9,7 +9,11 @@ Ensure that the cancel and info handlers within the resource module works . `dirname $0`/sharness.sh grug="${SHARNESS_TEST_SRCDIR}/data/resource/grugs/tiny.graphml" +rv1="${SHARNESS_TEST_SRCDIR}/data/resource/rv1exec/tiny_rv1exec.json" jobspec="${SHARNESS_TEST_SRCDIR}/data/resource/jobspecs/basics/test001.yaml" +jobspec1="${SHARNESS_TEST_SRCDIR}/data/resource/jobspecs/cancel/test018.yaml" +jobspec2="${SHARNESS_TEST_SRCDIR}/data/resource/jobspecs/cancel/test019.yaml" +rv1cancel="${SHARNESS_TEST_SRCDIR}/data/resource/rv1exec/cancel/rank1_cancel.json" # # test_under_flux is under sharness.d/ @@ -71,4 +75,20 @@ test_expect_success 'removing resource works' ' remove_resource ' +test_expect_success 'loading resource module with a tiny machine config works' ' + load_resource \ +load-file=${rv1} prune-filters=ALL:core \ +load-format=rv1exec subsystems=containment policy=low +' + +test_expect_success 'resource-cancel works' ' + flux ion-resource match allocate ${jobspec1} && + flux ion-resource partial-cancel 0 ${rv1cancel} && + flux ion-resource match allocate ${jobspec2} +' + +test_expect_success 'removing resource works' ' + remove_resource +' + test_done From 6ea88772b41571092b4068a89126adda22c70fbe Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 5 Jul 2024 20:12:31 -0700 Subject: [PATCH 23/24] resource: avoid copy constructor in for loops Problem: many range-based loops in Fluxion result in calling an object copy constructor. Avoid the copy constructor cost and add const type qualifier where appropriate. --- resource/planner/c++/planner_multi.cpp | 12 +++--- resource/readers/resource_reader_jgf.cpp | 45 +++++++++++--------- resource/readers/resource_reader_rv1exec.cpp | 4 +- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/resource/planner/c++/planner_multi.cpp b/resource/planner/c++/planner_multi.cpp index 2f051c83f..5545c89a8 100644 --- a/resource/planner/c++/planner_multi.cpp +++ b/resource/planner/c++/planner_multi.cpp @@ -57,7 +57,7 @@ planner_multi::planner_multi (int64_t base_time, uint64_t duration, planner_multi::planner_multi (const planner_multi &o) { - for (auto iter : o.m_types_totals_planners) { + for (auto &iter : o.m_types_totals_planners) { planner_t *np = nullptr; if (iter.planner) { try { @@ -92,7 +92,7 @@ planner_multi &planner_multi::operator= (const planner_multi &o) // Erase *this so the vectors are empty erase (); - for (auto iter : o.m_types_totals_planners) { + for (const auto &iter : o.m_types_totals_planners) { planner_t *np = nullptr; if (iter.planner) { try { @@ -141,10 +141,10 @@ bool planner_multi::operator== (const planner_multi &o) const if (m_types_totals_planners.size () != o.m_types_totals_planners.size ()) return false; - auto &o_by_type = o.m_types_totals_planners.get (); - auto &by_type = m_types_totals_planners.get (); - for (auto data : by_type) { - auto o_data = o_by_type.find (data.resource_type); + const auto &o_by_type = o.m_types_totals_planners.get (); + const auto &by_type = m_types_totals_planners.get (); + for (const auto &data : by_type) { + const auto o_data = o_by_type.find (data.resource_type); if (o_data == o_by_type.end ()) return false; if (data.resource_type != o_data->resource_type) diff --git a/resource/readers/resource_reader_jgf.cpp b/resource/readers/resource_reader_jgf.cpp index c66fef151..685967038 100644 --- a/resource/readers/resource_reader_jgf.cpp +++ b/resource/readers/resource_reader_jgf.cpp @@ -197,19 +197,19 @@ std::string diff (const resource_pool_t &r, const fetch_helper_t &f) sstream << " name=(" << r.name << ", " << f.name << ")"; if (r.properties != f.properties) { sstream << " properties=("; - for (auto &kv : r.properties) + for (const auto &kv : r.properties) sstream << kv.first << "=" << kv.second << " "; sstream << ", "; - for (auto &kv : f.properties) + for (const auto &kv : f.properties) sstream << kv.first << "=" << kv.second << " "; sstream << ")"; } if (r.paths != f.paths) { sstream << " paths=("; - for (auto &kv : r.paths) + for (const auto &kv : r.paths) sstream << kv.first << "=" << kv.second << " "; sstream << ", "; - for (auto &kv : f.paths) + for (const auto &kv : f.paths) sstream << kv.first << "=" << kv.second << " "; sstream << ")"; } @@ -455,7 +455,7 @@ vtx_t resource_reader_jgf_t::create_vtx (resource_graph_t &g, g[v].paths = fetcher.paths; g[v].schedule.plans = plans; g[v].idata.x_checker = x_checker; - for (auto kv : g[v].paths) + for (const auto &kv : g[v].paths) g[v].idata.member_of[kv.first] = "*"; done: @@ -468,10 +468,10 @@ vtx_t resource_reader_jgf_t::vtx_in_graph (const resource_graph_t &g, std::string> &paths, int rank) { - for (auto const &paths_it : paths) { + for (const auto &paths_it : paths) { auto iter = m.by_path.find (paths_it.second); if (iter != m.by_path.end ()) { - for (auto &v : iter->second) { + for (const auto &v : iter->second) { if (g[v].rank == rank) { return v; } @@ -492,7 +492,7 @@ int resource_reader_jgf_t::check_root (vtx_t v, resource_graph_t &g, { int rc = -1; std::pair::iterator, bool> ptr; - for (auto kv : g[v].paths) { + for (const auto &kv : g[v].paths) { if (is_root (kv.second)) { ptr = is_roots.emplace (kv.first, true); if (!ptr.second) @@ -512,7 +512,7 @@ int resource_reader_jgf_t::add_graph_metadata (vtx_t v, int rc = -1; std::pair::iterator, bool> ptr; - for (auto kv : g[v].paths) { + for (const auto &kv : g[v].paths) { if (is_root (kv.second)) { ptr = m.roots.emplace (kv.first, v); if (!ptr.second) { @@ -540,27 +540,30 @@ int resource_reader_jgf_t::remove_graph_metadata (vtx_t v, resource_graph_metadata_t &m) { int rc = -1; - for (auto kv : g[v].paths) { + for (auto &kv : g[v].paths) { m.by_path.erase (kv.second); } m.by_outedges.erase (v); - for (auto it = m.by_type[g[v].type].begin (); it != m.by_type[g[v].type].end (); ++it) { + for (auto it = m.by_type[g[v].type].begin (); + it != m.by_type[g[v].type].end (); ++it) { if (*it == v) { m.by_type[g[v].type].erase (it); break; } } - for (auto it = m.by_name[g[v].name].begin (); it != m.by_name[g[v].name].end (); ++it) { + for (auto it = m.by_name[g[v].name].begin (); + it != m.by_name[g[v].name].end (); ++it) { if (*it == v) { m.by_name[g[v].name].erase (it); break; } } - for (auto it = m.by_rank[g[v].rank].begin (); it != m.by_rank[g[v].rank].end (); ++it) { + for (auto it = m.by_rank[g[v].rank].begin (); + it != m.by_rank[g[v].rank].end (); ++it) { if (*it == v) { m.by_rank[g[v].rank].erase (it); break; @@ -657,7 +660,7 @@ int resource_reader_jgf_t::exist (resource_graph_t &g, { try { auto &vect = m.by_path.at (path); - for (auto &u : vect) { + for (const auto &u : vect) { if (g[u].rank == rank) { v = u; return 0; @@ -694,7 +697,7 @@ int resource_reader_jgf_t::find_vtx (resource_graph_t &g, goto done; } - for (auto &kv : fetcher.paths) { + for (const auto &kv : fetcher.paths) { if (exist (g, m, kv.second, fetcher.rank, fetcher.vertex_id, u) < 0) goto done; if (v == nullvtx) { @@ -884,7 +887,7 @@ int resource_reader_jgf_t::undo_vertices (resource_graph_t &g, planner_t *plans = NULL; vtx_t v = boost::graph_traits::null_vertex (); - for (auto &kv : vmap) { + for (const auto &kv : vmap) { if (kv.second.exclusive != 1) continue; try { @@ -1118,7 +1121,7 @@ int resource_reader_jgf_t::update_src_edge (resource_graph_t &g, if (vmap[source].is_roots.empty ()) return 0; - for (auto &kv : vmap[source].is_roots) + for (const auto &kv : vmap[source].is_roots) m.v_rt_edges[kv.first].set_for_trav_update (vmap[source].needs, vmap[source].exclusive, token); @@ -1204,7 +1207,7 @@ int resource_reader_jgf_t::get_subgraph_vertices (resource_graph_t &g, for (; ei != ei_end; ++ei) { next_vtx = boost::target (*ei, g); - for (auto const &paths_it : g[next_vtx].paths) { + for (const auto &paths_it : g[next_vtx].paths) { // check that we don't recurse on parent edges if (paths_it.second.find (g[vtx].name) != std::string::npos && paths_it.second.find (g[vtx].name) < paths_it.second.find (g[next_vtx].name)) { @@ -1230,7 +1233,7 @@ int resource_reader_jgf_t::get_parent_vtx (resource_graph_t &g, for (; ei != ei_end; ++ei) { next_vtx = boost::target (*ei, g); - for (auto const &paths_it : g[vtx].paths) { + for (const auto &paths_it : g[vtx].paths) { // check that the parent's name exists in the child's path before the child's name if (paths_it.second.find (g[next_vtx].name) != std::string::npos && paths_it.second.find (g[vtx].name) > paths_it.second.find (g[next_vtx].name)) { @@ -1356,7 +1359,7 @@ int resource_reader_jgf_t::remove_subgraph (resource_graph_t &g, return -1; } - for (auto &v : iter->second) { + for (const auto &v : iter->second) { subgraph_root_vtx = v; } @@ -1370,7 +1373,7 @@ int resource_reader_jgf_t::remove_subgraph (resource_graph_t &g, if (remove_metadata_outedges (parent_vtx, subgraph_root_vtx, g, m) != 0) return -1; - for (auto & vtx : vtx_list) { + for (auto &vtx : vtx_list) { // clear vertex edges but don't delete vertex boost::clear_vertex (vtx, g); remove_graph_metadata (vtx, g, m); diff --git a/resource/readers/resource_reader_rv1exec.cpp b/resource/readers/resource_reader_rv1exec.cpp index 3128ccfb3..d0d38fdd1 100644 --- a/resource/readers/resource_reader_rv1exec.cpp +++ b/resource/readers/resource_reader_rv1exec.cpp @@ -250,7 +250,7 @@ vtx_t resource_reader_rv1exec_t::find_vertex (resource_graph_t &g, if (vtx_iter == m.by_path.end ()) return null_vtx; // Found in by_path - for (vtx_t v : vtx_iter->second) { + for (const vtx_t &v : vtx_iter->second) { if (g[v].rank == rank && g[v].id == id && g[v].size == size @@ -334,7 +334,7 @@ int resource_reader_rv1exec_t::undo_vertices (resource_graph_t &g, planner_t *plans = NULL; for (auto &[rank, vertices] : update_data.updated_vertices) { - for (vtx_t vtx : vertices) { + for (const vtx_t &vtx : vertices) { // Check plan if ( (plans = g[vtx].schedule.plans) == NULL) { errno = EINVAL; From a20ad21719c83acd7db120cea253529fc99ad0f0 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Mon, 8 Jul 2024 20:32:03 -0700 Subject: [PATCH 24/24] testsuite: update multiqueue test for partial cancel Problem: partial cancel functionality changes the order of jobid3 and jobid4 start after cancellation of jobid2 in t1009-recovery-multiqueue. Add an OR condition to wait on jobid3 or jobid4 to start upon cancelling jobid2. --- t/t1009-recovery-multiqueue.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t1009-recovery-multiqueue.t b/t/t1009-recovery-multiqueue.t index 3a00f724e..7737555e6 100755 --- a/t/t1009-recovery-multiqueue.t +++ b/t/t1009-recovery-multiqueue.t @@ -78,7 +78,8 @@ test_expect_success 'recovery: works when both modules restart (rv1)' ' test_expect_success 'recovery: a cancel leads to a job schedule (rv1)' ' flux cancel ${jobid2} && - flux job wait-event -t 10 ${jobid4} start + (flux job wait-event -t 10 ${jobid3} start || + flux job wait-event -t 10 ${jobid4} start) ' test_expect_success 'recovery: cancel all jobs (rv1_nosched)' '