From c403aafacc84dfddd61f6dc9f000dd99fadc9508 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 22 Mar 2024 16:26:43 -0700 Subject: [PATCH 1/3] flux-resource: improve performance of ResourceSetExtra initializer Problem: The ResourceSetExtra class is supposed to be just a wrapper around a ResourceSet class that adds the convenience `propertiesx` and `queue` properties. However, using this class is slow because it reinvokes the underlying ResourceSet initializer, which ends up creating a unnecessary copy of the resource set from scratch. Make the class a wrapper by just stashing the original ResourceSet object and forwarding unknown getattrs to the wrapped object. This avoids the wasted time recreating the coped resource set. --- src/cmd/flux-resource.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cmd/flux-resource.py b/src/cmd/flux-resource.py index 50342a2ce497..338d02ab27f3 100755 --- a/src/cmd/flux-resource.py +++ b/src/cmd/flux-resource.py @@ -418,11 +418,14 @@ class ResourceSetExtra(ResourceSet): def __init__(self, arg=None, version=1, flux_config=None): self.flux_config = flux_config if isinstance(arg, ResourceSet): - super().__init__(arg.encode(), version) + self._rset = arg if arg.state: self.state = arg.state else: - super().__init__(arg, version) + self._rset = ResourceSet(arg, version) + + def __getattr__(self, attr): + return getattr(self._rset, attr) @property def propertiesx(self): From 0cd2d14181f9e97e035a728b19ece048e4696133 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 22 Mar 2024 16:35:32 -0700 Subject: [PATCH 2/3] flux-resource: increase efficiency of resource_uniq_lines() Problem: The `flux resource list` command spends the majority of its time in resource_uniq_lines() on large clusters because it iterates over every rank in each state resource set in order to create the set of unique lines. Instead split the resource into smaller subsets based on all combinations of properties contained within that set. For the purposes of the current incarnation of `flux resource list` this should be the minimum number of distinct resource sets required to generate possibly unique lines of output. --- src/cmd/flux-resource.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/cmd/flux-resource.py b/src/cmd/flux-resource.py index 338d02ab27f3..f16f1cafe1a7 100755 --- a/src/cmd/flux-resource.py +++ b/src/cmd/flux-resource.py @@ -14,6 +14,7 @@ import logging import os.path import sys +from itertools import combinations import flux from flux.hostlist import Hostlist @@ -453,6 +454,34 @@ def queue(self): return queues +def split_by_property_combinations(rset): + """ + Split a resource set by all combinations of its properties. + This is done in hopes of splitting a resource into the minimum number + of subsets that may produce unique lines in the resource listing output. + """ + + def constraint_combinations(rset): + properties = set(json.loads(rset.get_properties()).keys()) + sets = [ + set(combination) + for i in range(1, len(properties) + 1) + for combination in combinations(properties, i) + ] + # Also include the empty set, i.e. resources with no properties + sets.append(set()) + + # generate RFC 31 constraint objects for each property combination + result = [] + for cset in sets: + diff = properties - cset + cset.update(["^" + x for x in diff]) + result.append({"properties": list(cset)}) + return result + + return [rset.copy_constraint(x) for x in constraint_combinations(rset)] + + def resources_uniq_lines(resources, states, formatter, config): """ Generate a set of resource sets that would produce unique lines given @@ -496,8 +525,10 @@ def resources_uniq_lines(resources, states, formatter, config): lines[key].add(rset) continue - for rank in resources[state].ranks: - rset = resources[state].copy_ranks(rank) + for rset in split_by_property_combinations(resources[state]): + if not rset.ranks: + continue + rset.state = state rset = ResourceSetExtra(rset, flux_config=config) key = fmt.format(rset) From 5693383254aa0efdf9aceb0f2af8dc9c68dbbeb1 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Sat, 23 Mar 2024 09:39:57 -0700 Subject: [PATCH 3/3] flux-resource: sort output lines for reproducible results Problem: lines.values() does not guarantee a sorting order, but this is passed to formatter.print_items() in `flux resource list`, which could lead to arbitrary output order. Sort output lines on (resource state, first rank) to create reproducible output. --- src/cmd/flux-resource.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cmd/flux-resource.py b/src/cmd/flux-resource.py index f16f1cafe1a7..9c82638ecb3d 100755 --- a/src/cmd/flux-resource.py +++ b/src/cmd/flux-resource.py @@ -572,6 +572,14 @@ def get_resource_list(args): return resources, config +def sort_output(args, items): + """ + Sort by args.states order, then first rank in resource set + """ + statepos = {x[1]: x[0] for x in enumerate(args.states)} + return sorted(items, key=lambda x: (statepos[x.state], x.ranks.first())) + + def list_handler(args): headings = { "state": "STATE", @@ -591,7 +599,8 @@ def list_handler(args): formatter = flux.util.OutputFormat(fmt, headings=headings) lines = resources_uniq_lines(resources, args.states, formatter, config) - formatter.print_items(lines.values(), no_header=args.no_header) + items = sort_output(args, lines.values()) + formatter.print_items(items, no_header=args.no_header) def info(args):