From 285e66a316dfd34bdf684221259809733f974239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Mon, 30 Dec 2024 15:45:14 -0500 Subject: [PATCH] #558: base of reconstructed PR post conflict resolution and integration of changes --- src/lbaf/Applications/LBAF_app.py | 3 ++ src/lbaf/Execution/lbsAlgorithmBase.py | 12 +++++ src/lbaf/Model/lbsNode.py | 75 ++++++++++++++++++++++++++ src/lbaf/Model/lbsPhase.py | 2 +- src/lbaf/Model/lbsRank.py | 19 ++++--- 5 files changed, 104 insertions(+), 7 deletions(-) create mode 100644 src/lbaf/Model/lbsNode.py diff --git a/src/lbaf/Applications/LBAF_app.py b/src/lbaf/Applications/LBAF_app.py index 6e7daffd..715584b5 100644 --- a/src/lbaf/Applications/LBAF_app.py +++ b/src/lbaf/Applications/LBAF_app.py @@ -88,6 +88,7 @@ class InternalParameters: # From data input options data_stem: Optional[str] = None + ranks_per_node : Optional[int] = 1 # From samplers input options n_ranks: Optional[int] = None @@ -150,6 +151,8 @@ def init_parameters(self, config: dict, base_dir: str): else: self.phase_ids = from_data.get("phase_ids") self.expected_ranks = from_data.get("expected_ranks") + if (rpn := from_data.get("ranks_per_node")) is not None: + self.ranks_per_node = int(rpn) # Parse sampling parameters if present from_samplers = config.get("from_samplers") diff --git a/src/lbaf/Execution/lbsAlgorithmBase.py b/src/lbaf/Execution/lbsAlgorithmBase.py index 15d4d86f..3b7100be 100644 --- a/src/lbaf/Execution/lbsAlgorithmBase.py +++ b/src/lbaf/Execution/lbsAlgorithmBase.py @@ -195,6 +195,18 @@ def _initialize(self, p_id, phases, statistics): f"across {self._rebalanced_phase.get_number_of_ranks()} ranks " f"into phase {self._rebalanced_phase.get_id()}") + # Replicate nodes on rebalanced phase + ranks_per_node = 1 + new_nodes: List[Node] = [] + phase_ranks = self._initial_phase.get_ranks() + if (nr := len(phase_ranks)) > 0 and phase_ranks[0].node is not None: + ranks_per_node = phase_ranks[0].node.get_number_of_ranks() + if ranks_per_node > 1: + n_nodes = int(nr / ranks_per_node) + new_nodes = list(map( + lambda n_id: Node(self._logger, n_id), + list(range(0, n_nodes)))) + # Initialize run statistics self._update_statistics(statistics) diff --git a/src/lbaf/Model/lbsNode.py b/src/lbaf/Model/lbsNode.py new file mode 100644 index 00000000..b6c7a070 --- /dev/null +++ b/src/lbaf/Model/lbsNode.py @@ -0,0 +1,75 @@ +# +#@HEADER +############################################################################### +# +# lbsNode.py +# DARMA/LB-analysis-framework => LB Analysis Framework +# +# Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Questions? Contact darma@sandia.gov +# +############################################################################### +#@HEADER +# + +import copy +import math +import functools +import operator +from logging import Logger +from typing import Optional + +class Node: + """A class representing a node to which a set of ranks are assigned.""" + + def __init__( + self, + logger: Logger, + n_id: int = -1): + + # Assign logger to instance variable + self.__logger = logger #pylint:disable=unused-private-member + + # Member variables passed by constructor + self.__index = n_id + self.__ranks = set() + + def get_max_memory_usage(self): + """Combine all memory usages for each rank to get the node memory usage.""" + return functools.reduce( + operator.add, map(lambda r: r.get_max_memory_usage(), list(self.__ranks))) + + def add_rank(self, rank): + self.__ranks.add(rank) + + def get_number_of_ranks(self) -> int: + return len(self.__ranks) diff --git a/src/lbaf/Model/lbsPhase.py b/src/lbaf/Model/lbsPhase.py index 5f6cad29..b3cea4aa 100644 --- a/src/lbaf/Model/lbsPhase.py +++ b/src/lbaf/Model/lbsPhase.py @@ -137,7 +137,7 @@ def get_ranks(self): def copy_ranks(self, phase: Self): """Copy ranks from one phase to self.""" - new_ranks = set() + new_ranks: Set[Rank] = set() for r in phase.get_ranks(): # Minimally instantiate rank and copy new_r = Rank(self.__logger) diff --git a/src/lbaf/Model/lbsRank.py b/src/lbaf/Model/lbsRank.py index 0e561892..4cf1b1ca 100644 --- a/src/lbaf/Model/lbsRank.py +++ b/src/lbaf/Model/lbsRank.py @@ -48,19 +48,21 @@ from .lbsBlock import Block from .lbsObject import Object from .lbsQOIDecorator import qoi +from .lbsNode import Node class Rank: """A class representing a rank to which objects are assigned.""" def __init__( - self, - logger: Logger, - r_id: int = -1, - migratable_objects: set = None, - sentinel_objects: set = None): + self, + logger: Logger, + r_id: int = -1, + migratable_objects: set = None, + sentinel_objects: set = None, + node: Node = None): # Assign logger to instance variable - self.__logger = logger #pylint:disable=unused-private-member + self.__logger = logger # Member variables passed by constructor self.__index = r_id @@ -82,6 +84,11 @@ def __init__( # Start with empty metadata self.__metadata = {} + # Optionally, the rank is connected to a node + self.node = node + if node is not None: + node.add_rank(self) + def copy(self, rank): """Specialized copy method.""" # Copy all flat member variables