From 7a2f45b88acd3b37b4ef11d09b9799ec5ac62726 Mon Sep 17 00:00:00 2001 From: talbpw Date: Thu, 24 Oct 2024 12:54:05 -0600 Subject: [PATCH 01/18] stash, need unit tester for Realization --- ravenframework/DataObjects/DataSet.py | 13 +- ravenframework/Optimizers/Optimizer.py | 28 ++- ravenframework/Optimizers/RavenSampled.py | 9 + ravenframework/Realizations/Realization.py | 135 +++++++++++++ .../Realizations/__init__.py | 16 +- ravenframework/Samplers/MonteCarlo.py | 9 + ravenframework/Samplers/Sampler.py | 188 ++++++++++++++---- .../Samplers/NDSamples/gold/MC/solns.csv | 31 +++ .../{VectorSamples => NDSamples}/mc.xml | 29 ++- .../{VectorSamples => NDSamples}/tests | 0 .../Samplers/NDSamples/vectorInputModel.py | 25 +++ .../Samplers/VectorSamples/gold/MC/solns.csv | 31 --- 12 files changed, 410 insertions(+), 104 deletions(-) create mode 100644 ravenframework/Realizations/Realization.py rename tests/framework/Samplers/VectorSamples/vectorInputModel.py => ravenframework/Realizations/__init__.py (63%) create mode 100644 tests/framework/Samplers/NDSamples/gold/MC/solns.csv rename tests/framework/Samplers/{VectorSamples => NDSamples}/mc.xml (70%) rename tests/framework/Samplers/{VectorSamples => NDSamples}/tests (100%) create mode 100644 tests/framework/Samplers/NDSamples/vectorInputModel.py delete mode 100644 tests/framework/Samplers/VectorSamples/gold/MC/solns.csv diff --git a/ravenframework/DataObjects/DataSet.py b/ravenframework/DataObjects/DataSet.py index 6d8d2a06ac..5989317dd8 100644 --- a/ravenframework/DataObjects/DataSet.py +++ b/ravenframework/DataObjects/DataSet.py @@ -218,7 +218,8 @@ def addRealization(self, rlz): # rlz = {'H': [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]], # 'X': [0, 1, 2], # 'Y': [0.5, 1.5], - # '_indexMap': ['Y', 'X']} + # '_indexMap': [{'H': ['Y', 'X']}] + # } # Note the order, H has shape (2, 3) so the first index is Y and the second is X. # A sanity check is that H.shape == tuple(var.size for var in rlz['_indexMap']) # @@ -637,8 +638,16 @@ def realization(self, index=None, matchDict=None, noMatchDict=None, tol=1e-15, u rlzs = rlz if type(rlz).__name__ == "list" else [rlz] rlzs = [self._addIndexMapToRlz(rl) for rl in rlzs] dims = self.getDimensions() + print('*'*80) + print('DEBUGG whoami:', self.name) + print('DEBUGG dims:', dims) for index, rl in enumerate(rlzs): - d = {k:{'dims':tuple(dims[k]) ,'data': v} for (k,v) in rl.items()} + d = {k:{'dims':tuple(dims[k]) ,'data': v} for (k,v) in rl.items() if k not in ['_indexMap']} + print('*'*80) + print('DEBUGG d:') + for k, v in d.items(): + print(k, v) + print('*'*80) rlz[index] = xr.Dataset.from_dict(d) if len(rlzs) > 1: # concatenate just in case there are multiple realizations diff --git a/ravenframework/Optimizers/Optimizer.py b/ravenframework/Optimizers/Optimizer.py index e731bb9dc1..8561ccac82 100644 --- a/ravenframework/Optimizers/Optimizer.py +++ b/ravenframework/Optimizers/Optimizer.py @@ -18,10 +18,6 @@ Reworked 2020-01 @author: talbpaul """ -# for future compatibility with Python 3------------------------------------------------------------ -from __future__ import division, print_function, unicode_literals, absolute_import -# End compatibility block for Python 3-------------------------------------------------------------- - # External Modules---------------------------------------------------------------------------------- import copy import abc @@ -430,10 +426,25 @@ def _initializeInitSampler(self, externalSeeding): # get the sample self._initSampler.generateInput(None, None) rlz = self._initSampler.inputInfo['SampledVars'] - # NOTE by looping over self.toBeSampled, we could potentially not error out when extra vars are sampled - for var in self.toBeSampled: - if var in rlz: - self._initialValues[n][var] = rlz[var] # TODO float or np.1darray? + for var, val in rlz.items(): + if var in self.ndVariables: + expanded = self._expandNDVariable(var, val) + for expVar, expVal in expanded.items(): + if expVar in self.toBeSampled: + self._initialValues[n][expVar] = expVal + else: + if var in self.toBeSampled: + self._initialValues[n][var] = val + # TODO this doesn't technically guarantee that each var in toBeSampled has a value. + # Can we check against this, or will it error in an intelligent way? + + #### OLD #### + # # NOTE by looping over self.toBeSampled, we could potentially not error out when extra vars are sampled + # for var in self.toBeSampled: + # # TODO is var ever not in rlz? Should this be an error out? + # if var in rlz: + # self._initialValues[n][var] = rlz[var] # TODO float or np.1darray? + #### END OLD #### def initializeTrajectory(self, traj=None): """ @@ -517,7 +528,6 @@ def denormalizeData(self, normalized): denormed[var] = self.denormalizeVariable(normalized[var], var) return denormed - def needDenormalized(self): """ Determines if the currently used algorithms should be normalizing the input space or not diff --git a/ravenframework/Optimizers/RavenSampled.py b/ravenframework/Optimizers/RavenSampled.py index 61bc4c9841..9c5d19ea5b 100644 --- a/ravenframework/Optimizers/RavenSampled.py +++ b/ravenframework/Optimizers/RavenSampled.py @@ -151,6 +151,15 @@ def handleInput(self, paramInput): if self.limit is None: self.raiseAnError(IOError, 'A is required for any RavenSampled Optimizer!') + def _checkNDVariables(self): + """ + Checks properties of ND variables for compatibility. RavenSampled ND variables + work as expected with the default Sampler implementation. + @ In, None + @ Out, None + """ + # nothing to check at this point, just overriding the base Sampler definition that errors out by default. + def initialize(self, externalSeeding=None, solutionExport=None): """ This function should be called every time a clean optimizer is needed. Called before takeAstep in diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py new file mode 100644 index 0000000000..75866a0947 --- /dev/null +++ b/ravenframework/Realizations/Realization.py @@ -0,0 +1,135 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + Realizations carry sampled information between entities in RAVEN +""" + +class Realization: + """ + A mapping container specifically for carrying data between entities in RAVEN, such + as the Sampler and Step. + See https://docs.python.org/3/reference/datamodel.html?emulating-container-types=#emulating-container-types + """ + def __init__(self): + """ + Constructor. + @ In, None + @ Out, None + """ + self._values = {} # mapping of variables to their values + self.inputInfo = {} # additional information about this realization + self.indexMap = {} # information about dimensionality of variables + self.labels = {} # custom labels for tracking, set externally + + ######## + # + # dict-like members + # + def __len__(self): + """ + Python built-in for realization length. + @ In, None + @ Out, len, number of variables in realization + """ + return len(self._values) + + def __getitem__(self, key): + """ + Python built-in for acquiring values. + @ In, key, str, variable name + @ Out, item, any, contents of realization corresponding to variable + """ + return self._values[key] + + def __setitem__(self, key, value): + """ + Python built-in for setting values. + @ In, key, str, variable name + @ In, value, any, corresponding value + @ Out, None + """ + self._values[key] = value + + def __delitem__(self, key): + """ + Python built-in for removing values. + @ In, key, str, variable name + @ Out, None + """ + # TODO also remove from inputInfo and indexMap? + del self._values[key] + + # TODO needed? Used when getitem is called and it's not present + #def __missing__(self, key): + # return self._values.__missing__(key) + + def __iter__(self): + """ + Python built-in for providing iterator for keys. + @ In, None + @ Out, iter, iterable, variable names + """ + return iter(self._values) + + def __contains__(self, item): + """ + Python built-in for "in" patterns such as "x in d" + @ In, item, str, variable name + @ Out, in, bool, True if variable name in realization + """ + return item in self._values + + def update(self, *args, **kwargs): + """ + Python built-in for updating many key-value pairs at once + @ In, args, list, list arguments + @ In, kwargs, dict, dictionary arguments + @ Out, None + """ + return self._values.update(*args, **kwargs) + + def keys(self): + """ + Python built-in for acquiring variable names + @ In, None + @ Out, keys, list, list of var names + """ + return self._values.keys() + + def values(self): + """ + Python built-in for acquiring variable values + @ In, None + @ Out, keys, list, list of var values + """ + return self._values.values() + + def items(self): + """ + Python built-in for acquiring variable (name, value) pairs + @ In, None + @ Out, keys, list, list of var (name, value) tuples + """ + return self._values.items() + + def pop(self, *args): + """ + Python built-in for removing and returning entry in realization + @ In, None + @ Out, pop, any, value of corresponding key + """ + # TODO extend to other info dicts? + return self._values.pop(*args) + + diff --git a/tests/framework/Samplers/VectorSamples/vectorInputModel.py b/ravenframework/Realizations/__init__.py similarity index 63% rename from tests/framework/Samplers/VectorSamples/vectorInputModel.py rename to ravenframework/Realizations/__init__.py index c029eac922..7f7a7d8130 100644 --- a/tests/framework/Samplers/VectorSamples/vectorInputModel.py +++ b/ravenframework/Realizations/__init__.py @@ -11,12 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" + Realizations carry sampled information between entities in RAVEN +""" -import numpy as np +# These lines ensure that we do not have to do something like: +# 'from Samplers.Sampler import Sampler' outside of this submodule +from .Realization import Realization -def run(self,Input): - for key,val in Input.items(): - print(key,val) - self.scalarOut = self.scalarIn**2 - self.vectorOut = self.vectorIn**2 - self.t = np.arange(len(self.vectorIn)) +# from .Batches import BatchRealization + +#TODO do we need this? from .Factory import factory diff --git a/ravenframework/Samplers/MonteCarlo.py b/ravenframework/Samplers/MonteCarlo.py index 1862f98089..5c7e80524d 100644 --- a/ravenframework/Samplers/MonteCarlo.py +++ b/ravenframework/Samplers/MonteCarlo.py @@ -100,6 +100,15 @@ def localInputAndChecks(self, xmlNode, paramInput): else: self.raiseAnError(IOError, self, f'Monte Carlo sampler {self.name} needs the samplerInit block') + def _checkNDVariables(self): + """ + Provides an opportunity to check compatibility with and usage of N-dimensional variables. + By default, errors and provides notification to users. + @ In, None + @ Out, None + """ + # MonteCarlo supports ND variables, and doesn't need to check anything to operate as expected. + def localGenerateInput(self, model, myInput): """ Provides the next sample to take. diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index 0d28fbd9c9..483276d1fe 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -30,6 +30,8 @@ from ..utils.graphStructure import evaluateModelsOrder from ..BaseClasses import BaseEntity, Assembler +_vectorPostfixFormat = '__RVEC__{ID}' + class Sampler(utils.metaclass_insert(abc.ABCMeta, BaseEntity), Assembler, InputDataUser): """ This is the base class for samplers @@ -66,6 +68,13 @@ class cls. Omitting this optional attribute will result in a single scalar value instead. Each of the values in the matrix or vector will be the same as the single sampled value. \nb A model interface must be prepared to handle non-scalar inputs to use this option.""") + variableInput.addParam("dims", InputTypes.StringListType, required=False, + descr=r"""names the indexes that correspond to the shape of this variable. Required when \xmlAttr{shape} + is provided. For example, with \xmlAttr{shape}=``2,3'', if the dimensions of the variable + are ``years'' and ``hours'', then \xmlAttr{dims}=``year,hour'' tells RAVEN that the first + dimension (with length 2) is called ``year'' and the second dimension (with length 3) is called + ``hour``. Order must be the same as provided for \xmlAttr{shape}. + \nb A model interface must be prepared to handle non-scalar inputs to use this option.""") distributionInput = InputData.parameterInputFactory("distribution", contentType=InputTypes.StringType, descr=r"""name of the distribution that is associated to this variable. Its name needs to be contained in the \xmlNode{Distributions} block explained @@ -212,7 +221,7 @@ def __init__(self): # determined through graph theory. # element 1 (instance): instance of the function to be used, it is created every time the sampler is initialized. self.values = {} # for each variable the current value {'var name':value} - self.variableShapes = {} # stores the dimensionality of each variable by name, as tuple e.g. (2,3) for [[#,#,#],[#,#,#]] + self.ndVariables = {} # stores the dimensionality (names and shapes) of each variable by name, as tuple e.g. shape = (2,3) for [[#,#,#],[#,#,#]] self.inputInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below self.initSeed = None # if not provided the seed is randomly generated at the initialization of the sampler, the step can override the seed by sending in another one self.inputInfo['SampledVars' ] = self.values # this is the location where to get the values of the sampled variables @@ -330,6 +339,7 @@ def _readMoreXML(self, xmlNode): if self.type not in ['MonteCarlo', 'Metropolis']: if not self.toBeSampled: self.raiseAnError(IOError, f'<{self.type}> sampler named "{self.name}" requires at least one sampled !') + self._checkNDVariables() def _readMoreXMLbase(self, xmlNode): """ @@ -471,8 +481,17 @@ def _readInVariable(self, child, prefix): # store variable name for re-use varName = child.parameterValues['name'] # set shape if present - if 'shape' in child.parameterValues: - self.variableShapes[varName] = child.parameterValues['shape'] + shape = child.parameterValues.get('shape', None) + if shape is not None: + dims = child.parameterValues.get('dims', None) + # TODO move this check to an input check + # -> if "shape" is present, "dims" must be present as well! + if dims is None: + self.raiseAnError(IOError, f'For variable "{varName}" the "shape" parameter was provided without the "dims" parameter!') + if len(shape) != len(dims): + self.raiseAnError(IOError, f'For variable "{varName}" the number of entries in "shape" and "dims" does not match!') + self.ndVariables[varName] = {'shape': shape, + 'dims': dims} # read subnodes for childChild in child.subparts: if childChild.getName() == 'distribution': @@ -481,20 +500,43 @@ def _readInVariable(self, child, prefix): self.raiseAnError(IOError, 'A sampled variable cannot have both a distribution and a function, or more than one of either!') else: foundDistOrFunc = True - # name of the distribution to sample - toBeSampled = childChild.value + distName = childChild.value varData = {} - varData['name'] = childChild.value + varData['name'] = distName # variable dimensionality - if 'dim' not in childChild.parameterValues: - dim = 1 - else: - dim = childChild.parameterValues['dim'] + dim = childChild.parameterValues.get('dim', 1) varData['dim'] = dim - # set up mapping for variable to distribution - self.variables2distributionsMapping[varName] = varData # flag distribution as needing to be sampled - self.toBeSampled[prefix + varName] = toBeSampled + # if a ND variable, loop over elements and set them each + # to be sampled as if they were independent variables. + # If not a ND variable, treat it like a length-1 array. + if varName in self.ndVariables: + shape = self.ndVariables.get(varName)['shape'] + else: + shape = 1 + totalIndices = np.zeros(shape).size + for i in range(totalIndices): + name = varName + if totalIndices > 1: + name += _vectorPostfixFormat.format(ID=str(i)) + self.toBeSampled[prefix + name] = distName + # set up mapping for variable to distribution + self.variables2distributionsMapping[name] = varData + # ##### OLD ##### + # # name of the distribution to sample + # toBeSampled = childChild.value + # varData = {} + # varData['name'] = childChild.value + # # variable dimensionality + # if 'dim' not in childChild.parameterValues: + # dim = 1 + # else: + # dim = childChild.parameterValues['dim'] + # varData['dim'] = dim + # # set up mapping for variable to distribution + # self.variables2distributionsMapping[varName] = varData + # # flag distribution as needing to be sampled + # self.toBeSampled[prefix + varName] = toBeSampled elif childChild.getName() == 'function': # can only have a function if doesn't already have a distribution or function if not foundDistOrFunc: @@ -713,6 +755,19 @@ def readSamplerInit(self,xmlNode): self.raiseAnError(IOError, f'Unknown tag {childChildChildChild.getName()}. Available are: initialGridDisc and tolerance!') self.NDSamplingParams[childChildChild.parameterValues['name']] = NDdistData + def _checkNDVariables(self): + """ + Provides an opportunity to check compatibility with and usage of N-dimensional variables. + By default, errors and provides notification to users. + @ In, None + @ Out, None + """ + # NOTE the base class Sampler will handle moving ND variables into individual variables + # using the self.toBeSampled dictionary mapping, so no specific action needs to be taken + # to enable ND variables for a sampler, aside from overriding this method in the sampler. + if self.ndVariables: + self.raiseAnError(IOError, f'"{self.type}" sampler named "{self.name}" is not compatible with ND-variables (using the "shape" parameter!)') + #### GETTERS AND SETTERS #### def endJobRunnable(self): """ @@ -822,9 +877,9 @@ def _constantVariables(self): """ if len(self.constants) > 0: # we inject the constant variables into the SampledVars - self.inputInfo['SampledVars' ].update(self.constants) + self.inputInfo['SampledVars'].update(self.constants) # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight) - self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(),1.0)) + self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(), 1.0)) pbKey = ['ProbabilityWeight-'+key for key in self.constants] self.addMetaKeys(pbKey) self.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants], 1.0)) @@ -837,21 +892,73 @@ def _constantVariables(self): self.inputInfo['batchInfo']['batchRealizations'][b].update( dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants], 1.0)) - def _expandVectorVariables(self): + def _formNDVariables(self): """ - Expands vector variables to fit the requested shape. + Formats ND variables to fit the requested shape. @ In, None @ Out, None """ - # by default, just repeat this value into the desired shape. May be overloaded by other samplers. - for var,shape in self.variableShapes.items(): - if self.inputInfo.get('batchMode',False): - for b in range(self.inputInfo['batchInfo']['nRuns']): - baseVal = self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'][var] - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'][var] = np.ones(shape)*baseVal - else: - baseVal = self.inputInfo['SampledVars'][var] - self.inputInfo['SampledVars'][var] = np.ones(shape)*baseVal + if not self.inputInfo.get('batchMode', False): + rlzList = [self.inputInfo] + # for baseName, info in self.ndVariables.items(): + # shape = info['shape'] + # # collect all the values from the split variables + # values = [] + # entries = np.zeros(shape).size + # for i in range(entries): + # var = baseName + # if entries > 1: + # var += _vectorPostfixFormat.format(ID=str(i)) + # values.append(self.inputInfo['SampledVars'].pop(var)) + # # shape values into the requested format + # self.inputInfo['SampledVars'][baseName] = np.asarray(values).reshape(shape) + # # TODO does other data need extracting, like probability weights and etc? + else: + rlzList = self.inputInfo['batchInfo']['batchRealizations'] + for r, rlz in enumerate(rlzList): + for baseName, info in self.ndVariables.items(): + shape = info['shape'] + dims = info['dims'] + # collect all the values from the split variables + values = [] + entries = np.zeros(shape).size + for i in range(entries): + var = baseName + if entries > 1: + var += _vectorPostfixFormat.format(ID=str(i)) + value = self.values.pop(var) + values.append(value) + # shape values into the requested format + rlz['SampledVars'][baseName] = np.asarray(values).reshape(shape) + # update indexMap + if entries > 1: + # TODO do we need to add both to self.values and to rlz (inputInfo.sampledvars)? + if r==0 and '_indexMap' not in self.values: + self.values['_indexMap'] = {} + if '_indexMap' not in rlz: + rlz['SampledVars']['_indexMap'] = {} + rlz['SampledVars']['_indexMap'][baseName] = dims + # check for missing index vars and add default values if needed + for d,dim in enumerate(dims): + if dim not in rlz['SampledVars']: + rlz['SampledVars'][dim] = np.arange(shape[d]) + self.raiseAWarning(f'Values for index "{dim}" not provided in Sampler; ' +\ + f'using default values (0 to {rlz["SampledVars"][dim][-1]}).') + + def _expandNDVariable(self, ndName, ndVals): + """ + Turns a name-NDarray pair into individual name-value pairs + @ In, ndName, name of (full ND array) variable + @ In, ndVals, np.ndarray, ND array of values + @ Out, expanded, dict, mapping of individual names to values + """ + # defined above, but for reference: + # _vectorPostfixFormat = '__RVEC__{ID}' + expanded = {} + for ID, val in enumerate(ndVals.flat): + name = ndName + _vectorPostfixFormat.format(ID=ID) + expanded[name] = val + return expanded def _evaluateFunctionsOrder(self): """ @@ -906,8 +1013,8 @@ def _incrementCounter(self): @ Out, None """ #since we are creating the input for the next run we increase the counter and global counter - self.counter +=1 - self.auxcnt +=1 + self.counter += 1 + self.auxcnt += 1 # prep to exit if over the limit if self.counter >= self.limit: self.raiseADebug('Sampling limit reached!') @@ -915,11 +1022,11 @@ def _incrementCounter(self): # FIXME, the following condition check is make sure that the require info is only printed once # when dump metadata to xml, this should be removed in the future when we have a better way to # dump the metadata - if self.counter >1: + if self.counter > 1: for key in self.entitiesToRemove: self.inputInfo.pop(key,None) if self.reseedAtEachIteration: - randomUtils.randomSeed(self.auxcnt-1) + randomUtils.randomSeed(self.auxcnt - 1) self.inputInfo['prefix'] = str(self.counter) def _performVariableTransform(self): @@ -984,7 +1091,8 @@ def generateInput(self,model,oldInput): only the code interface possesses the dictionary for reading the variable definition syntax @ In, model, model instance, it is the instance of a RAVEN model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) - @ Out, generateInput, tuple(0,list), list contains the new inputs -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. + @ Out, generateInput, tuple(0,list), list contains the new inputs + -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. The Out parameter depends on the results of generateInput If a new point is found, the default Out above is correct. If a restart point is found: @@ -993,28 +1101,21 @@ def generateInput(self,model,oldInput): self._incrementCounter() if model is not None: model.getAdditionalInputEdits(self.inputInfo) - self.localGenerateInput(model, oldInput) + ##### GENERATE SAMPLE ##### + self.localGenerateInput(model, oldInput) #NOTE oldInput is input to the Step, not a model input # split the sampled vars Pb among the different correlated variables self._reassignSampledVarsPbToFullyCorrVars() self._reassignPbWeightToCorrelatedVars() - ##### TRANSFORMATION ##### self._performVariableTransform() - ##### CONSTANT VALUES ###### self._constantVariables() - ##### REDUNDANT FUNCTIONALS ##### self._functionalVariables() - ##### VECTOR VARS ##### - self._expandVectorVariables() - ##### RESET DISTRIBUTIONS WITH MEMORY ##### + self._formNDVariables() + # reset distribution memory for key in self.distDict: if self.distDict[key].getMemory(): self.distDict[key].reset() - ##### RESTART ##### + ##### CHECK RESTART ##### _, inExisting = self._checkRestartForEvaluation() - # reformat metadata into acceptable format for dataojbect - # DO NOT format here, let that happen when a realization is made in collectOutput for each Model. Sampler doesn't care about this. - # self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight']) - # self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix']) #if not found or not restarting, we have a new point! if inExisting is None: # we have a new evaluation, so check its contents for consistency @@ -1039,7 +1140,6 @@ def generateInput(self,model,oldInput): rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) rlz['metadata'] = copy.deepcopy(self.inputInfo) # TODO need deepcopy only because inputInfo is on self - return 1, rlz def generateInputBatch(self, myInput, model, batchSize, projector=None): diff --git a/tests/framework/Samplers/NDSamples/gold/MC/solns.csv b/tests/framework/Samplers/NDSamples/gold/MC/solns.csv new file mode 100644 index 0000000000..cf7bc8399d --- /dev/null +++ b/tests/framework/Samplers/NDSamples/gold/MC/solns.csv @@ -0,0 +1,31 @@ +t,scalarIn,vectorIn,scalarOut,vectorOut,vectorConst1,vectorConst2 +0,1.3745401144,2.79654298439,1.88936052609,7.82065266352,1,a +1,1.3745401144,2.95071431178,1.88936052609,8.70671494977,2,b +2,1.3745401144,2.18343478771,1.88936052609,4.7673874722,3,c +3,1.3745401144,2.7319939385,1.88936052609,7.46379088001,4,d +4,1.3745401144,2.77969099762,1.88936052609,7.72668204227,5,e +5,1.3745401144,2.59865848641,1.88936052609,6.75302592898,6,f +6,1.3745401144,2.59685016158,1.88936052609,6.7436307617,7,g +7,1.3745401144,2.15601863855,1.88936052609,4.64841636979,8,h +8,1.3745401144,2.44583275762,1.88936052609,5.98209787823,9,i +9,1.3745401144,2.15599452382,1.88936052609,4.64831238674,10,j +0,1.09997492053,2.05808361109,1.2099448258,4.23570815023,1,a +1,1.09997492053,2.45924888795,1.2099448258,6.04790509287,2,b +2,1.09997492053,2.86617614885,1.2099448258,8.21496571621,3,c +3,1.09997492053,2.33370861139,1.2099448258,5.4461958829,4,d +4,1.09997492053,2.60111501152,1.2099448258,6.76579930314,5,e +5,1.09997492053,2.14286681431,1.2099448258,4.59187818387,6,f +6,1.09997492053,2.70807257847,1.2099448258,7.33365709024,7,g +7,1.09997492053,2.65088847341,1.2099448258,7.02720969847,8,h +8,1.09997492053,2.02058449877,1.2099448258,4.08276171667,9,i +9,1.09997492053,2.05641157647,1.2099448258,4.22882857184,10,j +0,1.96990984724,2.72199877159,3.88054480626,7.40927731253,1,a +1,1.96990984724,2.83244263656,3.88054480626,8.02273128942,2,b +2,1.96990984724,2.9385527144,3.88054480626,8.63509205533,3,c +3,1.96990984724,2.21233911189,3.88054480626,4.89444434598,4,d +4,1.96990984724,2.00077876472,3.88054480626,4.00311566535,5,e +5,1.96990984724,2.18182496707,3.88054480626,4.76036018695,6,f +6,1.96990984724,2.99221156444,3.88054480626,8.95333004638,7,g +7,1.96990984724,2.18340450995,3.88054480626,4.76725525408,8,h +8,1.96990984724,2.6174815075,3.88054480626,6.85120944213,9,i +9,1.96990984724,2.30424224103,3.88054480626,5.30953230537,10,j diff --git a/tests/framework/Samplers/VectorSamples/mc.xml b/tests/framework/Samplers/NDSamples/mc.xml similarity index 70% rename from tests/framework/Samplers/VectorSamples/mc.xml rename to tests/framework/Samplers/NDSamples/mc.xml index 4d49900ae7..f9b907d0a4 100644 --- a/tests/framework/Samplers/VectorSamples/mc.xml +++ b/tests/framework/Samplers/NDSamples/mc.xml @@ -1,7 +1,7 @@ - framework/Samplers/VectorSamples/MC + framework/Samplers/NDSamples/MC talbpaul 2018-03-21 Samplers.MonteCarlo @@ -37,6 +37,10 @@ 2 3 + + 3 + 4 + @@ -48,29 +52,32 @@ u1 - + u2 - 1,2,3,4,5,6,7,8,9,10 - a,b,c,d,e,f,g,h,i,j + + u3 + + 1,2,3,4,5 + a,b,c,d,e + 10,12,14,16,18 - scalarIn,vectorIn,scalarOut,vectorOut,t + scalarIn,vectorIn,ND_in + scalarOut,vectorOut - - scalarIn - OutputPlaceHolder - + - scalarIn,vectorIn + scalarIn,vectorIn,ND_in scalarOut,vectorOut,vectorConst1,vectorConst2 - vectorIn,vectorOut,vectorConst1,vectorConst2 + vectorIn,ND_in,vectorConst1,vectorConst2 + ND_in,vectorOut diff --git a/tests/framework/Samplers/VectorSamples/tests b/tests/framework/Samplers/NDSamples/tests similarity index 100% rename from tests/framework/Samplers/VectorSamples/tests rename to tests/framework/Samplers/NDSamples/tests diff --git a/tests/framework/Samplers/NDSamples/vectorInputModel.py b/tests/framework/Samplers/NDSamples/vectorInputModel.py new file mode 100644 index 0000000000..d9c3aee583 --- /dev/null +++ b/tests/framework/Samplers/NDSamples/vectorInputModel.py @@ -0,0 +1,25 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def run(raven,inputDict): + """ + Simple mechanical operations to test inputting and outputting ND values. + """ + raven.scalarOut = raven.scalarIn**2 + raven.vectorOut = [] + print('DEBUGG extmod input:') + for k, v in inputDict.items(): + print(k) + for _, value in enumerate(raven.y): + raven.vectorOut.append(raven.ND_in[value,:].sum()) diff --git a/tests/framework/Samplers/VectorSamples/gold/MC/solns.csv b/tests/framework/Samplers/VectorSamples/gold/MC/solns.csv deleted file mode 100644 index 5b66cb6afc..0000000000 --- a/tests/framework/Samplers/VectorSamples/gold/MC/solns.csv +++ /dev/null @@ -1,31 +0,0 @@ -t,scalarIn,vectorIn,scalarOut,vectorOut,vectorConst1,vectorConst2 -0,1.3745401144,2.79654298439,1.88936052609,7.82065266352,1,a -1,1.3745401144,2.79654298439,1.88936052609,7.82065266352,2,b -2,1.3745401144,2.79654298439,1.88936052609,7.82065266352,3,c -3,1.3745401144,2.79654298439,1.88936052609,7.82065266352,4,d -4,1.3745401144,2.79654298439,1.88936052609,7.82065266352,5,e -5,1.3745401144,2.79654298439,1.88936052609,7.82065266352,6,f -6,1.3745401144,2.79654298439,1.88936052609,7.82065266352,7,g -7,1.3745401144,2.79654298439,1.88936052609,7.82065266352,8,h -8,1.3745401144,2.79654298439,1.88936052609,7.82065266352,9,i -9,1.3745401144,2.79654298439,1.88936052609,7.82065266352,10,j -0,1.95071431178,2.18343478771,3.8052863262,4.7673874722,1,a -1,1.95071431178,2.18343478771,3.8052863262,4.7673874722,2,b -2,1.95071431178,2.18343478771,3.8052863262,4.7673874722,3,c -3,1.95071431178,2.18343478771,3.8052863262,4.7673874722,4,d -4,1.95071431178,2.18343478771,3.8052863262,4.7673874722,5,e -5,1.95071431178,2.18343478771,3.8052863262,4.7673874722,6,f -6,1.95071431178,2.18343478771,3.8052863262,4.7673874722,7,g -7,1.95071431178,2.18343478771,3.8052863262,4.7673874722,8,h -8,1.95071431178,2.18343478771,3.8052863262,4.7673874722,9,i -9,1.95071431178,2.18343478771,3.8052863262,4.7673874722,10,j -0,1.7319939385,2.77969099762,2.999803003,7.72668204227,1,a -1,1.7319939385,2.77969099762,2.999803003,7.72668204227,2,b -2,1.7319939385,2.77969099762,2.999803003,7.72668204227,3,c -3,1.7319939385,2.77969099762,2.999803003,7.72668204227,4,d -4,1.7319939385,2.77969099762,2.999803003,7.72668204227,5,e -5,1.7319939385,2.77969099762,2.999803003,7.72668204227,6,f -6,1.7319939385,2.77969099762,2.999803003,7.72668204227,7,g -7,1.7319939385,2.77969099762,2.999803003,7.72668204227,8,h -8,1.7319939385,2.77969099762,2.999803003,7.72668204227,9,i -9,1.7319939385,2.77969099762,2.999803003,7.72668204227,10,j From 618264c22e62d3a52d1e3de25eb22abd86e00350 Mon Sep 17 00:00:00 2001 From: talbpw Date: Thu, 24 Oct 2024 15:46:48 -0600 Subject: [PATCH 02/18] unit test for Realization as a dict --- .../Realizations/TestRealization.py | 387 ++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 tests/framework/unit_tests/Realizations/TestRealization.py diff --git a/tests/framework/unit_tests/Realizations/TestRealization.py b/tests/framework/unit_tests/Realizations/TestRealization.py new file mode 100644 index 0000000000..37eb616fd1 --- /dev/null +++ b/tests/framework/unit_tests/Realizations/TestRealization.py @@ -0,0 +1,387 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + This Module performs Unit Tests for the Realization objects. +""" + +import os, sys + +# find location of crow, message handler +ravenDir = os.path.abspath(os.path.join(*([os.path.dirname(__file__)]+[os.pardir]*4))) +sys.path.append(ravenDir) + +from ravenframework import Realizations + +print('Module undergoing testing:') +print(Realizations.Realization) +print('') + +results = {"pass":0,"fail":0} + +def checkFloat(comment,value,expected,tol=1e-10,update=True): + """ + This method is aimed to compare two floats given a certain tolerance + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ In, tol, float, optional, the tolerance + @ Out, res, bool, True if same + """ + if np.isnan(value) and np.isnan(expected): + res = True + elif np.isnan(value) or np.isnan(expected): + res = False + else: + res = abs(value - expected) <= tol + if update: + if not res: + print("checking float",comment,'|',value,"!=",expected) + results["fail"] += 1 + else: + results["pass"] += 1 + return res + +def checkTrue(comment,res,update=True): + """ + This method is a pass-through for consistency and updating + @ In, comment, string, a comment printed out if it fails + @ In, res, bool, the tested value + @ Out, res, bool, True if test + """ + if update: + if res: + results["pass"] += 1 + else: + print("checking bool",comment,'|',res,'is not True!') + results["fail"] += 1 + return res + +def checkSame(comment,value,expected,update=True): + """ + This method is aimed to compare two identical things + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ Out, res, bool, True if same + """ + res = value == expected + if update: + if res: + results["pass"] += 1 + else: + print("checking string",comment,'|',value,"!=",expected) + results["fail"] += 1 + return res + +def checkArray(comment,first,second,dtype,tol=1e-10,update=True): + """ + This method is aimed to compare two arrays + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ In, tol, float, optional, the tolerance + @ Out, res, bool, True if same + """ + res = True + if len(first) != len(second): + res = False + print("checking answer",comment,'|','lengths do not match:',len(first),len(second)) + else: + for i in range(len(first)): + if dtype == float: + pres = checkFloat('',first[i],second[i],tol,update=False) + elif dtype.__name__ in ('str','unicode'): + pres = checkSame('',first[i],second[i],update=False) + if not pres: + print('checking array',comment,'|','entry "{}" does not match: {} != {}'.format(i,first[i],second[i])) + res = False + if update: + if res: + results["pass"] += 1 + else: + results["fail"] += 1 + return res + +def checkRlz(comment,first,second,tol=1e-10,update=True): + """ + This method is aimed to compare two realization + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ In, tol, float, optional, the tolerance + @ Out, res, bool, True if same + """ + res = True + if len(first) != len(second): + res = False + print("checking answer",comment,'|','lengths do not match:',len(first),len(second)) + else: + for key,val in first.items(): + if isinstance(val,float): + pres = checkFloat('',val,second[key],tol,update=False) + elif type(val).__name__ in ('str','unicode','str_','unicode_'): + pres = checkSame('',val,second[key][0],update=False) + elif isinstance(val,xr.DataArray): + if isinstance(val.item(0),(float,int)): + pres = (val - second[key]).sum()<1e-20 #necessary due to roundoff + else: + pres = val.equals(second[key]) + else: + raise TypeError(type(val)) + if not pres: + print('checking dict',comment,'|','entry "{}" does not match: {} != {}'.format(key,first[key],second[key])) + res = False + if update: + if res: + results["pass"] += 1 + else: + results["fail"] += 1 + return res + +def checkNone(comment,entry,update=True): + """ + Tests if the entry identifies as None. + @ In, comment, str, comment to print if failed + @ In, entry, object, object to test + @ In, update, bool, optional, if True then updates results + @ Out, None + """ + res = entry is None + if update: + if res: + results["pass"] += 1 + else: + print("checking answer",comment,'|','"{}" is not None!'.format(entry)) + results["fail"] += 1 + +def checkFails(comment,errstr,function,update=True,args=None,kwargs=None): + """ + Tests if function fails as expected + @ In, comment, str, comment to print if failed + @ In, errstr, str, expected error string + @ In, function, method, method to run + @ In, update, bool, optional, if True then updates results + @ In, args, list, arguments to function + @ In, kwargs, dict, keywords arguments to function + @ Out, res, bool, result (True if passed) + """ + print('Error testing ...') + if args is None: + args = [] + if kwargs is None: + kwargs = {} + try: + function(*args,**kwargs) + res = False + msg = 'Function call did not error!' + except Exception as e: + res = checkSame('',e.args[0],errstr,update=False) + if not res: + msg = 'Unexpected error message. \n Received: "{}"\n Expected: "{}"'.format(e.args[0],errstr) + if update: + if res: + results["pass"] += 1 + print(' ... end Error testing (PASSED)') + else: + print("checking error",comment,'|',msg) + results["fail"] += 1 + print(' ... end Error testing (FAILED)') + print('') + return res + + + +####### +# +# Quacks like a dict +# +rlz = Realizations.Realization() + +# setitem +a = 3 +b = 42 +pi = 3.14159 +rlz['a'] = a +rlz['b'] = b +rlz['pi'] = pi +rlz[5] = 'c' + + +# membership, contains +for member in ['a', 'b', 'pi', 5]: + if not member in rlz: + print(f'checking member "{member}", got False expected True!') + results['fail'] += 1 + else: + results['pass'] += 1 + +for nonmember in ['d', 2, 1.618, 'values']: + if nonmember in rlz: + print(f'checking member "{nonmember}", got True expected False!') + results['fail'] += 1 + else: + results['pass'] += 1 + + +# getitem +gb = rlz['b'] +if not gb == 42: + print(f'checking getitem "b", got "{gb}" expected "{b}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + +ga = rlz['a'] +if not ga == 3: + print(f'checking getitem "a", got "{ga}" expected "{a}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + +gp = rlz['pi'] +if not gp == 3.14159: + print(f'checking getitem "pi", got "{gp}" expected "{pi}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + +g5 = rlz[5] +if not g5 == 'c': + print(f'checking getitem "pi", got "{g5}" expected "c"!') + results['fail'] += 1 +else: + results['pass'] += 1 + + +# get +ga = rlz.get('a') +if not ga == 3: + print(f'checking get "a", got "{ga}" expected "{a}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + +gd = rlz.get('d', 15) +if not gd == 15: + print(f'checking get default, got "{gd}" expected "{15}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + + +# len +if not len(rlz) == 4: + print(f'checking len, got "{len(rlz)}" expected "{3}"!') + results['fail'] += 1 +else: + results['pass'] += 1 + + +# delitem +del rlz['b'] +if 'b' in rlz: + print('checking del, failed to remove "b"!') + results['fail'] += 1 +else: + results['pass'] += 1 + + +# iter +expk = ['a', 'pi', 5] +for i, k in enumerate(rlz): + if k != expk[i]: + print(f'checking iter[{i}], got "{k}" expected "{expk[i]}"!') + results['fail'] += 1 + else: + results['pass'] += 1 + + +# keys +for i, k in enumerate(rlz.keys()): + if k != expk[i]: + print(f'checking keys[{i}], got "{k}" expected "{expk[i]}"!') + results['fail'] += 1 + else: + results['pass'] += 1 + + +# values +expv = [3, 3.14159, 'c'] +for i, v in enumerate(rlz.values()): + if v != expv[i]: + print(f'checking values[{i}], got "{v}" expected "{expv[i]}"!') + results['fail'] += 1 + else: + results['pass'] += 1 + + +# items +for i, (k, v) in enumerate(rlz.items()): + if (k != expk[i]) or (v != expv[i]): + print(f'checking items[{i}], got "({k}, {v})" expected ("{expk[i]}, {expv[i]}")!') + results['fail'] += 1 + else: + results['pass'] += 1 + + +# update +new = {'a': 30, # update old entry + 'b': 420, # add back old entry in new position + 5: 'c2', # update old entry + 'new': 372} # new entry +rlz.update(new) +expk = ['a', 'pi', 5, 'b', 'new'] +expv = [ 30, 3.14159, 'c2', 420, 372] +for i, (k, v) in enumerate(rlz.items()): + if (k != expk[i]) or (v != expv[i]): + print(f'checking update[{i}], got "({k}, {v})" expected ("{expk[i]}, {expv[i]}")!') + results['fail'] += 1 + else: + results['pass'] += 1 + +# pop +val = rlz.pop(5) +if val != 'c2': + print(f'checking pop[5], got "{val}" expected "c")!') + results['fail'] += 1 +else: + results['pass'] += 1 +if 5 in rlz: + print('checking pop[5], failed to remove 5!') + results['fail'] += 1 +else: + results['pass'] += 1 + + + +####### +# +# Results +# + +print(results) + + +sys.exit(results["fail"]) +""" + + framework.test_realization + talbpaul + 2024-10-23 + Realization + + This test is a Unit Test for the Realization class. + + +""" From f9a9c3e60d632c43bcd6fdea895164ee27f93458 Mon Sep 17 00:00:00 2001 From: talbpw Date: Tue, 29 Oct 2024 10:30:01 -0600 Subject: [PATCH 03/18] stash for devel merge --- .../Realizations/BatchRealization.py | 81 +++++ ravenframework/Realizations/Realization.py | 26 +- ravenframework/Realizations/__init__.py | 5 +- ravenframework/Samplers/MonteCarlo.py | 35 +- ravenframework/Samplers/Sampler.py | 320 ++++++++---------- ravenframework/Steps/MultiRun.py | 8 +- .../Realizations/TestRealization.py | 175 +--------- 7 files changed, 276 insertions(+), 374 deletions(-) create mode 100644 ravenframework/Realizations/BatchRealization.py diff --git a/ravenframework/Realizations/BatchRealization.py b/ravenframework/Realizations/BatchRealization.py new file mode 100644 index 0000000000..c962b625b6 --- /dev/null +++ b/ravenframework/Realizations/BatchRealization.py @@ -0,0 +1,81 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + Collection of Realizations for convenient co-packaging +""" +from . import Realization + +class BatchRealization: + """ + A container for groups of Realization objects, that should mostly invisibly work like a realization + """ + def __init__(self, batchSize): + """ + Constructor. + @ In, None + @ Out, None + """ + # TODO are any of these shared across realizations? + # self._values = {} # mapping of variables to their values + # self.inputInfo = {'SampledVars': {}, # additional information about this realization + # 'SampledVarsPb': {}, + # 'crowDist': {} + # } + # self.indexMap = {} # information about dimensionality of variables + self.batchSize = batchSize # number of realizations that are part of this object + self._realizations = [Realization() for _ in range(min(batchSize, 1))] + + + ######## + # + # other useful methods + # + + ######## + # + # dict-like members + # + def __len__(self): + """ + Python built-in for realization length. + @ In, None + @ Out, len, int, number of realizations in batch + """ + return len(self._realizations) + + def __getitem__(self, index): + """ + Python built-in for acquiring values. + @ In, index, int, index of desired item + @ Out, item, any, contents of realization corresponding to variable + """ + return self._realizations[index] + + def __setitem__(self, index, value): + """ + Python built-in for setting values. + @ In, index, int, index of desired item + @ In, value, any, corresponding value + @ Out, None + """ + raise IndexError('Tried to overwrite a Realization object in a Batch!') + + def pop(self, *args): + """ + Python built-in for removing and returning entry in realization + @ In, None + @ Out, pop, any, value of corresponding index + """ + return self._realizations.pop(*args) + diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py index 75866a0947..3ccf0031d2 100644 --- a/ravenframework/Realizations/Realization.py +++ b/ravenframework/Realizations/Realization.py @@ -28,9 +28,18 @@ def __init__(self): @ Out, None """ self._values = {} # mapping of variables to their values - self.inputInfo = {} # additional information about this realization + self.inputInfo = {'SampledVars': {}, # additional information about this realization + 'SampledVarsPb': {}, + 'crowDist': {} + } self.indexMap = {} # information about dimensionality of variables self.labels = {} # custom labels for tracking, set externally + self.batchSize = 0 # not a batch, easy way to check + + ######## + # + # other useful methods + # ######## # @@ -70,10 +79,6 @@ def __delitem__(self, key): # TODO also remove from inputInfo and indexMap? del self._values[key] - # TODO needed? Used when getitem is called and it's not present - #def __missing__(self, key): - # return self._values.__missing__(key) - def __iter__(self): """ Python built-in for providing iterator for keys. @@ -132,4 +137,15 @@ def pop(self, *args): # TODO extend to other info dicts? return self._values.pop(*args) + def get(self, key, default=None): + """ + Accessor for acquiring values. + @ In, key, str, variable name + @ Out, item, any, contents of realization corresponding to variable + """ + if default is None: + return self._values.get(key) + else: + return self._values.get(key, default) + diff --git a/ravenframework/Realizations/__init__.py b/ravenframework/Realizations/__init__.py index 7f7a7d8130..d08113ed66 100644 --- a/ravenframework/Realizations/__init__.py +++ b/ravenframework/Realizations/__init__.py @@ -18,7 +18,4 @@ # These lines ensure that we do not have to do something like: # 'from Samplers.Sampler import Sampler' outside of this submodule from .Realization import Realization - -# from .Batches import BatchRealization - -#TODO do we need this? from .Factory import factory +from .BatchRealization import BatchRealization diff --git a/ravenframework/Samplers/MonteCarlo.py b/ravenframework/Samplers/MonteCarlo.py index 5c7e80524d..3ef03053f0 100644 --- a/ravenframework/Samplers/MonteCarlo.py +++ b/ravenframework/Samplers/MonteCarlo.py @@ -109,13 +109,13 @@ def _checkNDVariables(self): """ # MonteCarlo supports ND variables, and doesn't need to check anything to operate as expected. - def localGenerateInput(self, model, myInput): + def localGenerateInput(self, rlz, model, modelInput): """ - Provides the next sample to take. - After this method is called, the self.inputInfo should be ready to be sent - to the model + Fills the Realization with values of the next sample + After this method is called, rlz should be ready to be sent to the model + @ In, rlz, Realization, mapping to populate with sample values @ In, model, model instance, an instance of a model - @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) + @ In, modelInput, list, a list of the Step inputs for the model (e.g. files) @ Out, None """ # create values dictionary @@ -145,9 +145,9 @@ def localGenerateInput(self, model, myInput): else: rvsnum = self.distDict[key].rvs() for kkey in key.split(','): - self.values[kkey] = np.atleast_1d(rvsnum)[0] - self.inputInfo['SampledVarsPb'][key] = self.distDict[key].pdf(rvsnum) - self.inputInfo['ProbabilityWeight-' + key] = 1. + rlz[kkey] = np.atleast_1d(rvsnum)[0] + rlz.inputInfo['SampledVarsPb'][key] = self.distDict[key].pdf(rvsnum) + rlz.inputInfo['ProbabilityWeight-' + key] = 1. elif totDim > 1: if reducedDim == 1: if self.samplingType is None: @@ -162,25 +162,26 @@ def localGenerateInput(self, model, myInput): if reducedDim > len(coordinate): self.raiseAnError(IOError, "The dimension defined for variables drew from the multivariate normal distribution is exceeded by the dimension used in Distribution (MultivariateNormal) ") probabilityValue = self.distDict[key].pdf(coordinate) - self.inputInfo['SampledVarsPb'][key] = probabilityValue + rlz.inputInfo['SampledVarsPb'][key] = probabilityValue for var in self.distributions2variablesMapping[dist]: varID = utils.first(var.keys()) varDim = var[varID] for kkey in varID.strip().split(','): - self.values[kkey] = np.atleast_1d(rvsnum)[varDim-1] - self.inputInfo[f'ProbabilityWeight-{dist}'] = 1. + rlz[kkey] = np.atleast_1d(rvsnum)[varDim-1] + rlz.inputInfo[f'ProbabilityWeight-{dist}'] = 1. else: self.raiseAnError(IOError, "Total dimension for given distribution should be >= 1") - if len(self.inputInfo['SampledVarsPb'].keys()) > 0: - self.inputInfo['PointProbability'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) + if len(rlz.inputInfo['SampledVarsPb'].keys()) > 0: + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) else: - self.inputInfo['PointProbability'] = 1.0 + rlz.inputInfo['PointProbability'] = 1.0 if self.samplingType == 'uniform': - self.inputInfo['ProbabilityWeight' ] = weight + rlz.inputInfo['ProbabilityWeight'] = weight else: - self.inputInfo['ProbabilityWeight' ] = 1.0 # MC weight is 1/N => weight is one - self.inputInfo['SamplerType'] = 'MonteCarlo' + rlz.inputInfo['ProbabilityWeight'] = 1.0 # MC weight is 1/N => weight is one + rlz.inputInfo['SamplerType'] = 'MonteCarlo' + return rlz def _localHandleFailedRuns(self, failedRuns): """ diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index 483276d1fe..da22fa4cb3 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -29,6 +29,7 @@ from ..utils import utils,randomUtils,InputData, InputTypes from ..utils.graphStructure import evaluateModelsOrder from ..BaseClasses import BaseEntity, Assembler +from ..Realizations import BatchRealization _vectorPostfixFormat = '__RVEC__{ID}' @@ -220,16 +221,16 @@ def __init__(self): # is detected, the order is just random, otherwise the order is # determined through graph theory. # element 1 (instance): instance of the function to be used, it is created every time the sampler is initialized. - self.values = {} # for each variable the current value {'var name':value} + # TODO REMOVE self.values = {} # for each variable the current value {'var name':value} self.ndVariables = {} # stores the dimensionality (names and shapes) of each variable by name, as tuple e.g. shape = (2,3) for [[#,#,#],[#,#,#]] - self.inputInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below + # TODO REMOVE self.inputInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below + self.samplerInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below self.initSeed = None # if not provided the seed is randomly generated at the initialization of the sampler, the step can override the seed by sending in another one - self.inputInfo['SampledVars' ] = self.values # this is the location where to get the values of the sampled variables - self.inputInfo['SampledVarsPb' ] = {} # this is the location where to get the probability of the sampled variables - self.inputInfo['crowDist'] = {} # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object + # TODO REMOVE self.inputInfo['SampledVars' ] = self.values # this is the location where to get the values of the sampled variables + # TODO REMOVE self.inputInfo['SampledVarsPb' ] = {} # this is the location where to get the probability of the sampled variables + # TODO REMOVE self.inputInfo['crowDist'] = {} # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object self.constants = {} # In this dictionary self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding - self.FIXME = False # FIXME flag self.printTag = self.type # prefix for all prints (sampler type) self.restartData = None # presampled points to restart from @@ -264,7 +265,7 @@ def __init__(self): self.transformationMethod = {} # transformation method used in variablesTransformation node {'modelName':method} self.entitiesToRemove = [] # This variable is used in order to make sure the transformation info is printed once in the output xml file. - def _generateDistributions(self, availableDist, availableFunc): + def _generateDistributions(self, rlz, availableDist, availableFunc): """ Generates the distributions and functions. @ In, availableDist, dict, dict of distributions @@ -277,7 +278,7 @@ def _generateDistributions(self, availableDist, availableFunc): if self.toBeSampled[key] not in availableDist: self.raiseAnError(IOError, f'Distribution {self.toBeSampled[key]} not found among available distributions (check input)!') self.distDict[key] = availableDist[self.toBeSampled[key]] - self.inputInfo['crowDist'][key] = json.dumps(self.distDict[key].getCrowDistDict()) + rlz.inputInfo['crowDist'][key] = json.dumps(rlz.distDict[key].getCrowDistDict()) for key, val in self.dependentSample.items(): if val not in availableFunc.keys(): self.raiseAnError(ValueError, f'Function {val} was not found among the available functions:', availableFunc.keys()) @@ -681,7 +682,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): transformDict = {} transformDict['type'] = self.distDict[variable.strip()].type transformDict['transformationMatrix'] = self.distDict[variable.strip()].transformationMatrix() - self.inputInfo[f'transformation-{distName}'] = transformDict + # FIXME not inputInfo, where should this go? + self.samplerInfo[f'transformation-{distName}'] = transformDict self.entitiesToRemove.append(f'transformation-{distName}') # Register expected metadata @@ -690,6 +692,14 @@ def initialize(self, externalSeeding=None, solutionExport=None): meta += ['ProbabilityWeight-'+ key for key in var.split(",")] self.addMetaKeys(meta) + def getBatchSize(self): + """ + Returns the size of batches to use for this Sampler. Default is 0. + @ In, None + @ Out, size, int, 0 + """ + return 0 + def localGetInitParams(self): """ Method used to export to the printer in the base class the additional PERMANENT your local class have @@ -788,16 +798,11 @@ def getCurrentSetting(self): and each parameter's initial value as the dictionary values """ paramDict = {} - paramDict['counter' ] = self.counter - paramDict['initial seed' ] = self.initSeed - for key in self.inputInfo: - if key!='SampledVars': - paramDict[key] = self.inputInfo[key] - else: - for var in self.inputInfo['SampledVars'].keys(): - paramDict['Variable: '+var+' has value'] = paramDict[key][var] + paramDict['counter'] = self.counter + paramDict['initial seed'] = self.initSeed + for key in self.samplerInfo: + paramDict[key] = self.samplerInfo[key] paramDict.update(self.localGetCurrentSetting()) - return paramDict def getJobsToEnd(self, clear=False): @@ -869,37 +874,30 @@ def _checkRestartForEvaluation(self): return index, inExisting - def _constantVariables(self): + def _constantVariables(self, rlzBatch): """ Method to set the constant variables into the inputInfo dictionary - @ In, None + @ In, rlzBatch, BatchRealization, batch of mapping of sampled vars to values @ Out, None """ if len(self.constants) > 0: - # we inject the constant variables into the SampledVars - self.inputInfo['SampledVars'].update(self.constants) - # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight) - self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(), 1.0)) - pbKey = ['ProbabilityWeight-'+key for key in self.constants] - self.addMetaKeys(pbKey) - self.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants], 1.0)) - # update in batch mode - if self.inputInfo.get('batchMode',False): - for b in range(self.inputInfo['batchInfo']['nRuns']): - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'].update(self.constants) - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVarsPb'].update(dict.fromkeys( - self.constants.keys(), 1.0)) - self.inputInfo['batchInfo']['batchRealizations'][b].update( - dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants], 1.0)) - - def _formNDVariables(self): + for rlz in rlzBatch: + # we inject the constant variables into the SampledVars + rlz.update(self.constants) + # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight) + rlz.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(), 1.0)) + pbKey = ['ProbabilityWeight-'+key for key in self.constants] + self.addMetaKeys(pbKey) + rlz.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants], 1.0)) + + def _formNDVariables(self, rlzBatch): """ Formats ND variables to fit the requested shape. - @ In, None + @ In, rlzBatch, BatchRealization, batch of mapping of sampled vars to values @ Out, None """ - if not self.inputInfo.get('batchMode', False): - rlzList = [self.inputInfo] + for rlz in rlzBatch: + # TODO REMOVE # for baseName, info in self.ndVariables.items(): # shape = info['shape'] # # collect all the values from the split variables @@ -913,9 +911,6 @@ def _formNDVariables(self): # # shape values into the requested format # self.inputInfo['SampledVars'][baseName] = np.asarray(values).reshape(shape) # # TODO does other data need extracting, like probability weights and etc? - else: - rlzList = self.inputInfo['batchInfo']['batchRealizations'] - for r, rlz in enumerate(rlzList): for baseName, info in self.ndVariables.items(): shape = info['shape'] dims = info['dims'] @@ -926,24 +921,20 @@ def _formNDVariables(self): var = baseName if entries > 1: var += _vectorPostfixFormat.format(ID=str(i)) - value = self.values.pop(var) + value = rlz.values.pop(var) values.append(value) # shape values into the requested format - rlz['SampledVars'][baseName] = np.asarray(values).reshape(shape) + rlz[baseName] = np.asarray(values).reshape(shape) # update indexMap if entries > 1: # TODO do we need to add both to self.values and to rlz (inputInfo.sampledvars)? - if r==0 and '_indexMap' not in self.values: - self.values['_indexMap'] = {} - if '_indexMap' not in rlz: - rlz['SampledVars']['_indexMap'] = {} - rlz['SampledVars']['_indexMap'][baseName] = dims + rlz.indexMap[baseName] = dims # check for missing index vars and add default values if needed for d,dim in enumerate(dims): - if dim not in rlz['SampledVars']: - rlz['SampledVars'][dim] = np.arange(shape[d]) + if dim not in rlz: + rlz[dim] = np.arange(shape[d]) self.raiseAWarning(f'Values for index "{dim}" not provided in Sampler; ' +\ - f'using default values (0 to {rlz["SampledVars"][dim][-1]}).') + f'using default values (0 to {rlz[dim][-1]}).') def _expandNDVariable(self, ndName, ndVals): """ @@ -987,24 +978,17 @@ def _evaluateFunctionsOrder(self): ' -> '.join([f"variable:{var} | function: {self.funcDict[var].instance.name}" for var in self.variableFunctionExecutionList])) - def _functionalVariables(self): + def _functionalVariables(self, rlzBatch): """ Evaluates variables that are functions of other input variables. - @ In, None + @ In, rlzBatch, BatchRealization, batch of mapping of sampled vars to values @ Out, None """ - # generate the function variable values for var in self.variableFunctionExecutionList: - if self.inputInfo.get('batchMode',False): - for b in range(self.inputInfo['batchInfo']['nRuns']): - values = self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'] - test=self.funcDict[var].instance.evaluate(self.funcDict[var].methodName,values) - for corrVar in var.split(","): - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'][corrVar.strip()] = test - else: - test=self.funcDict[var].instance.evaluate(self.funcDict[var].methodName, self.values) + for rlz in rlzBatch: + funcEval = self.funcDict[var].instance.evaluate(self.funcDict[var].methodName, rlz) for corrVar in var.split(","): - self.values[corrVar.strip()] = test + rlz[corrVar.strip()] = funcEval def _incrementCounter(self): """ @@ -1024,115 +1008,117 @@ def _incrementCounter(self): # dump the metadata if self.counter > 1: for key in self.entitiesToRemove: - self.inputInfo.pop(key,None) + self.samplerInfo.pop(key,None) if self.reseedAtEachIteration: randomUtils.randomSeed(self.auxcnt - 1) - self.inputInfo['prefix'] = str(self.counter) + self.samplerInfo['prefix'] = str(self.counter) - def _performVariableTransform(self): + def _performVariableTransform(self, rlzBatch): """ Performs variable transformations if existing. - @ In, None + @ In, rlzBatch, BatchRealization, batch of maps for vars to values @ Out, None """ - # add latent variables and original variables to self.inputInfo if self.variablesTransformationDict: - for dist,var in self.variablesTransformationDict.items(): - if self.transformationMethod[dist] == 'pca': - self.pcaTransform(var,dist) - else: - self.raiseAnError(NotImplementedError, f'transformation method is not yet implemented for {self.transformationMethod[dist]} method') + for rlz in rlzBatch: + # add latent variables and original variables to rlz.inputInfo + for dist, var in self.variablesTransformationDict.items(): + if self.transformationMethod[dist] == 'pca': + self.pcaTransform(rlz, var, dist) + else: + self.raiseAnError(NotImplementedError, f'transformation method is not yet implemented for {self.transformationMethod[dist]} method') - def _reassignSampledVarsPbToFullyCorrVars(self): + def _reassignSampledVarsPbToFullyCorrVars(self, rlzBatch): """ Method to reassign sampledVarsPb to the fully correlated variables - @ In, None + @ In, rlzBatch, BatchRealization, batch of maps for vars to values @ Out, None """ - #Need keys as list because modifying self.inputInfo['SampledVarsPb'] - keys = list(self.inputInfo['SampledVarsPb'].keys()) - fullyCorrVars = {s: self.inputInfo['SampledVarsPb'].pop(s) for s in keys if "," in s} - # assign the SampledVarsPb to the fully correlated vars - for key in fullyCorrVars: - for kkey in key.split(","): - if not self.inputInfo.get('batchMode', False): - self.inputInfo['SampledVarsPb'][kkey] = fullyCorrVars[key] - else: - for b in range(self.inputInfo['nRuns']): - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVarsPb'][kkey] = fullyCorrVars[key] + for rlz in rlzBatch: + #Need keys as list because modifying rlz.inputInfo['SampledVarsPb'] + keys = list(rlz.inputInfo['SampledVarsPb'].keys()) + fullyCorrVars = {s: rlz.inputInfo['SampledVarsPb'].pop(s) for s in keys if "," in s} + # assign the SampledVarsPb to the fully correlated vars + for key in fullyCorrVars: + for kkey in key.split(","): + rlz.inputInfo['SampledVarsPb'][kkey] = fullyCorrVars[key] - def _reassignPbWeightToCorrelatedVars(self): + def _reassignPbWeightToCorrelatedVars(self, rlzBatch): """ Method to reassign probability weight to the correlated variables - @ In, None + @ In, rlzBatch, BatchRealization, batch of maps for vars to values @ Out, None """ - # collect initial weights - pbWeights = {key:value for key, value in self.inputInfo.items() if 'ProbabilityWeight' in key} - for varName, varInfo in self.variables2distributionsMapping.items(): - # Handle ND Case - if varInfo['totDim'] > 1: - distName = self.variables2distributionsMapping[varName]['name'] - pbWeights[f'ProbabilityWeight-{varName}'] = self.inputInfo[f'ProbabilityWeight-{distName}'] - if "," in varName: - for subVarName in varName.split(","): - pbWeights[f'ProbabilityWeight-{subVarName.strip()}'] = pbWeights[f'ProbabilityWeight-{varName}'] - # update pbWeights - self.inputInfo.update(pbWeights) - # if batchmode, update batch - if self.inputInfo.get('batchMode',False): - for b in range(self.inputInfo['batchInfo']['nRuns']): - self.inputInfo['batchInfo']['batchRealizations'][b].update(pbWeights) - - def generateInput(self,model,oldInput): + for rlz in rlzBatch: + # collect initial weights + pbWeights = {key:value for key, value in rlz.inputInfo.items() if 'ProbabilityWeight' in key} + for varName, varInfo in self.variables2distributionsMapping.items(): + # Handle ND Case + if varInfo['totDim'] > 1: + distName = self.variables2distributionsMapping[varName]['name'] + pbWeights[f'ProbabilityWeight-{varName}'] = rlz.inputInfo[f'ProbabilityWeight-{distName}'] + if "," in varName: + for subVarName in varName.split(","): + pbWeights[f'ProbabilityWeight-{subVarName.strip()}'] = pbWeights[f'ProbabilityWeight-{varName}'] + # update pbWeights + rlz.inputInfo.update(pbWeights) + + def generateInput(self, model, modelInput): """ This method has to be overwritten to provide the specialization for the specific sampler The model instance in might be needed since, especially for external codes, only the code interface possesses the dictionary for reading the variable definition syntax @ In, model, model instance, it is the instance of a RAVEN model - @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) - @ Out, generateInput, tuple(0,list), list contains the new inputs - -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. - The Out parameter depends on the results of generateInput - If a new point is found, the default Out above is correct. - If a restart point is found: - @ Out, generateInput, tuple(int,dict), (1,realization dictionary) - """ - self._incrementCounter() + @ In, modelInput, list, a list of the original Step inputs for the model (e.g. files) + @ Out, found, int, number indicating the result of sampling this variable (e.g., 0 new sample, 1 from restart) + @ Out, rlz, Realization, mapping from variables to values for sample + @ Out, modelInput, potentially perturbed? original inputs for model, or None if taken from restart + """ if model is not None: - model.getAdditionalInputEdits(self.inputInfo) + model.getAdditionalInputEdits(rlz.inputInfo) ##### GENERATE SAMPLE ##### - self.localGenerateInput(model, oldInput) #NOTE oldInput is input to the Step, not a model input - # split the sampled vars Pb among the different correlated variables - self._reassignSampledVarsPbToFullyCorrVars() - self._reassignPbWeightToCorrelatedVars() - self._performVariableTransform() - self._constantVariables() - self._functionalVariables() - self._formNDVariables() + # instantiate a batch of data carrier realizations + batchSize = self.getBatchSize() + rlzBatch = BatchRealization(batchSize) + if batchSize == 0: + # this means the current sampler does not know how to handle batching, so do it one at a time + for rlz in rlzBatch: + self._incrementCounter() + self.localGenerateInput(rlz, model, modelInput) + else: + self._incrementCounter() # TODO FIXME for GA, need a batch counter + self.localGenerateInput(rlzBatch, model, modelInput) + # this sampler knows how to handle batching, so we do it all at once + # correlated variables + self._reassignSampledVarsPbToFullyCorrVars(rlzBatch) + self._reassignPbWeightToCorrelatedVars(rlzBatch) + # variable transforms + self._performVariableTransform(rlzBatch) + # constants and functioned values + self._constantVariables(rlzBatch) + self._functionalVariables(rlzBatch) + # ND variables + self._formNDVariables(rlzBatch) + # merge sampler metadata + rlz.inputInfo.update(self.samplerInfo) # reset distribution memory for key in self.distDict: if self.distDict[key].getMemory(): self.distDict[key].reset() ##### CHECK RESTART ##### - _, inExisting = self._checkRestartForEvaluation() + _, inExisting = self._checkRestartForEvaluation(rlz) #if not found or not restarting, we have a new point! if inExisting is None: # we have a new evaluation, so check its contents for consistency self._checkSample() - self.raiseADebug(f' ... Sample point {self.inputInfo["prefix"]}: {self.values}') - # The new info for the perturbed run will be stored in the sampler's - # inputInfo (I don't particularly like this, I think it should be - # returned here, but let's get this working and then we can decide how - # to best pass this information around. My reasoning is that returning - # it here means the sampler does not need to store it, and we can return - # a copy of the information, otherwise we have to be careful to create a - # deep copy of this information when we submit it to a job). - # -- DPM 4/18/17 - return 0, oldInput + self.raiseADebug(f' ... Sample point {rlz.inputInfo["prefix"]}: {rlz.values}') + for var, val in rlz.items(): + self.raiseADebug(f' ... - "{var}": "{val}"') + return 0, rlz, modelInput #otherwise, return the restart point else: # TODO use realization format as per new data object (no subspaces) + # TODO use Realization object? self.raiseADebug('Point found in restart!') rlz = {} # we've fixed it so the input and output space don't really matter, so use restartData's own definition @@ -1140,7 +1126,7 @@ def generateInput(self,model,oldInput): rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) rlz['metadata'] = copy.deepcopy(self.inputInfo) # TODO need deepcopy only because inputInfo is on self - return 1, rlz + return 1, rlz, None def generateInputBatch(self, myInput, model, batchSize, projector=None): """ @@ -1152,6 +1138,7 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ + FIXME used? newInputs = [] while self.amIreadyToProvideAnInput() and (self.counter < batchSize): if projector is None: @@ -1162,16 +1149,17 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): return newInputs @abc.abstractmethod - def localGenerateInput(self, model, oldInput): + def localGenerateInput(self, rlz, model, modelInput): """ This class need to be overwritten since it is here that the magic of the sampler happens. After this method call the self.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, mapping of variables to values @ In, model, model instance, Model instance @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, None """ - def pcaTransform(self, varsDict, dist): + def pcaTransform(self, rlz, varsDict, dist): """ This method is used to map latent variables with respect to the model input variables both the latent variables and the model input variables will be stored in the dict: self.inputInfo['SampledVars'] @@ -1179,36 +1167,28 @@ def pcaTransform(self, varsDict, dist): @ In, dist, string, the distribution name associated with given variable set @ Out, None """ - def _applyTransformation(values): - """ - Wrapper to apply the pca transformation - @ In, values, dict, dictionary of sampled vars - @ Out, values, dict, the updated set of values - """ - latentVariablesValues = [] - listIndex = [] - manifestVariablesValues = [None] * len(varsDict['manifestVariables']) - for index,lvar in enumerate(varsDict['latentVariables']): - value = values.get(lvar) - if lvar is not None: - latentVariablesValues.append(value) - listIndex.append(varsDict['latentVariablesIndex'][index]) - - varName = utils.first(utils.first(self.distributions2variablesMapping[dist]).keys()) - varsValues = self.distDict[varName].pcaInverseTransform(latentVariablesValues,listIndex) - for index1,index2 in enumerate(varsDict['manifestVariablesIndex']): - manifestVariablesValues[index2] = varsValues[index1] - manifestVariablesDict = dict(zip(varsDict['manifestVariables'],manifestVariablesValues)) - values.update(manifestVariablesDict) - - return values - - if self.inputInfo.get('batchMode',False): - for b in range(self.inputInfo['batchInfo']['nRuns']): - values = self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'] - self.inputInfo['batchInfo']['batchRealizations'][b]['SampledVars'] = _applyTransformation(values) - else: - self.values = _applyTransformation(self.values) + # def _applyTransformation(values): + # """ + # TODO can this just be collapsed down to a single call now without a wrapper?? + # Wrapper to apply the pca transformation + # @ In, values, dict, dictionary of sampled vars + # @ Out, None # TODO REMOVE values, dict, the updated set of values + # """ + latentVariablesValues = [] + listIndex = [] + manifestVariablesValues = [None] * len(varsDict['manifestVariables']) + for index,lvar in enumerate(varsDict['latentVariables']): + value = rlz.get(lvar) + if lvar is not None: + latentVariablesValues.append(value) + listIndex.append(varsDict['latentVariablesIndex'][index]) + varName = utils.first(utils.first(self.distributions2variablesMapping[dist]).keys()) + varsValues = self.distDict[varName].pcaInverseTransform(latentVariablesValues,listIndex) + for index1,index2 in enumerate(varsDict['manifestVariablesIndex']): + manifestVariablesValues[index2] = varsValues[index1] + manifestVariablesDict = dict(zip(varsDict['manifestVariables'],manifestVariablesValues)) + rlz.update(manifestVariablesDict) + # TODO REMOVE_applyTransformation(rlz) def _checkSample(self): """ diff --git a/ravenframework/Steps/MultiRun.py b/ravenframework/Steps/MultiRun.py index 321c3ad72d..9ebfad031f 100644 --- a/ravenframework/Steps/MultiRun.py +++ b/ravenframework/Steps/MultiRun.py @@ -329,16 +329,16 @@ def _findANewInputToRun(self, sampler, model, inputs, outputs, jobHandler): # The value of "found" determines what the Sampler is ready to provide. # case 0: a new sample has been discovered and can be run, and newInp is a new input list. # case 1: found the input in restart, and newInp is a realization dictionary of data to use - found, newInp = sampler.generateInput(model,inputs) + found, rlz, modelInp = sampler.generateInput(model, inputs) if found == 1: - kwargs = copy.deepcopy(sampler.inputInfo) + # TODO REMOVE kwargs = rlz.inputInfo # TODO deeper copy needed? shouldn't be ... # "submit" the finished run - jobHandler.addFinishedJob(newInp, metadata=kwargs) + jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) return None # NOTE: we return None here only because the Sampler's "counter" is not correctly passed # through if we add several samples at once through the restart. If we actually returned # a Realization object from the Sampler, this would not be a problem. - talbpaul - return newInp + return rlz, modelInp def flushStep(self): """ diff --git a/tests/framework/unit_tests/Realizations/TestRealization.py b/tests/framework/unit_tests/Realizations/TestRealization.py index 37eb616fd1..d35dc3784c 100644 --- a/tests/framework/unit_tests/Realizations/TestRealization.py +++ b/tests/framework/unit_tests/Realizations/TestRealization.py @@ -29,179 +29,6 @@ results = {"pass":0,"fail":0} -def checkFloat(comment,value,expected,tol=1e-10,update=True): - """ - This method is aimed to compare two floats given a certain tolerance - @ In, comment, string, a comment printed out if it fails - @ In, value, float, the value to compare - @ In, expected, float, the expected value - @ In, tol, float, optional, the tolerance - @ Out, res, bool, True if same - """ - if np.isnan(value) and np.isnan(expected): - res = True - elif np.isnan(value) or np.isnan(expected): - res = False - else: - res = abs(value - expected) <= tol - if update: - if not res: - print("checking float",comment,'|',value,"!=",expected) - results["fail"] += 1 - else: - results["pass"] += 1 - return res - -def checkTrue(comment,res,update=True): - """ - This method is a pass-through for consistency and updating - @ In, comment, string, a comment printed out if it fails - @ In, res, bool, the tested value - @ Out, res, bool, True if test - """ - if update: - if res: - results["pass"] += 1 - else: - print("checking bool",comment,'|',res,'is not True!') - results["fail"] += 1 - return res - -def checkSame(comment,value,expected,update=True): - """ - This method is aimed to compare two identical things - @ In, comment, string, a comment printed out if it fails - @ In, value, float, the value to compare - @ In, expected, float, the expected value - @ Out, res, bool, True if same - """ - res = value == expected - if update: - if res: - results["pass"] += 1 - else: - print("checking string",comment,'|',value,"!=",expected) - results["fail"] += 1 - return res - -def checkArray(comment,first,second,dtype,tol=1e-10,update=True): - """ - This method is aimed to compare two arrays - @ In, comment, string, a comment printed out if it fails - @ In, value, float, the value to compare - @ In, expected, float, the expected value - @ In, tol, float, optional, the tolerance - @ Out, res, bool, True if same - """ - res = True - if len(first) != len(second): - res = False - print("checking answer",comment,'|','lengths do not match:',len(first),len(second)) - else: - for i in range(len(first)): - if dtype == float: - pres = checkFloat('',first[i],second[i],tol,update=False) - elif dtype.__name__ in ('str','unicode'): - pres = checkSame('',first[i],second[i],update=False) - if not pres: - print('checking array',comment,'|','entry "{}" does not match: {} != {}'.format(i,first[i],second[i])) - res = False - if update: - if res: - results["pass"] += 1 - else: - results["fail"] += 1 - return res - -def checkRlz(comment,first,second,tol=1e-10,update=True): - """ - This method is aimed to compare two realization - @ In, comment, string, a comment printed out if it fails - @ In, value, float, the value to compare - @ In, expected, float, the expected value - @ In, tol, float, optional, the tolerance - @ Out, res, bool, True if same - """ - res = True - if len(first) != len(second): - res = False - print("checking answer",comment,'|','lengths do not match:',len(first),len(second)) - else: - for key,val in first.items(): - if isinstance(val,float): - pres = checkFloat('',val,second[key],tol,update=False) - elif type(val).__name__ in ('str','unicode','str_','unicode_'): - pres = checkSame('',val,second[key][0],update=False) - elif isinstance(val,xr.DataArray): - if isinstance(val.item(0),(float,int)): - pres = (val - second[key]).sum()<1e-20 #necessary due to roundoff - else: - pres = val.equals(second[key]) - else: - raise TypeError(type(val)) - if not pres: - print('checking dict',comment,'|','entry "{}" does not match: {} != {}'.format(key,first[key],second[key])) - res = False - if update: - if res: - results["pass"] += 1 - else: - results["fail"] += 1 - return res - -def checkNone(comment,entry,update=True): - """ - Tests if the entry identifies as None. - @ In, comment, str, comment to print if failed - @ In, entry, object, object to test - @ In, update, bool, optional, if True then updates results - @ Out, None - """ - res = entry is None - if update: - if res: - results["pass"] += 1 - else: - print("checking answer",comment,'|','"{}" is not None!'.format(entry)) - results["fail"] += 1 - -def checkFails(comment,errstr,function,update=True,args=None,kwargs=None): - """ - Tests if function fails as expected - @ In, comment, str, comment to print if failed - @ In, errstr, str, expected error string - @ In, function, method, method to run - @ In, update, bool, optional, if True then updates results - @ In, args, list, arguments to function - @ In, kwargs, dict, keywords arguments to function - @ Out, res, bool, result (True if passed) - """ - print('Error testing ...') - if args is None: - args = [] - if kwargs is None: - kwargs = {} - try: - function(*args,**kwargs) - res = False - msg = 'Function call did not error!' - except Exception as e: - res = checkSame('',e.args[0],errstr,update=False) - if not res: - msg = 'Unexpected error message. \n Received: "{}"\n Expected: "{}"'.format(e.args[0],errstr) - if update: - if res: - results["pass"] += 1 - print(' ... end Error testing (PASSED)') - else: - print("checking error",comment,'|',msg) - results["fail"] += 1 - print(' ... end Error testing (FAILED)') - print('') - return res - - - ####### # # Quacks like a dict @@ -258,7 +85,7 @@ def checkFails(comment,errstr,function,update=True,args=None,kwargs=None): g5 = rlz[5] if not g5 == 'c': - print(f'checking getitem "pi", got "{g5}" expected "c"!') + print(f'checking getitem "5", got "{g5}" expected "c"!') results['fail'] += 1 else: results['pass'] += 1 From ab29cdac4c76334880bd7003a17b51f9934b9965 Mon Sep 17 00:00:00 2001 From: talbpw Date: Mon, 4 Nov 2024 18:30:18 -0700 Subject: [PATCH 04/18] unit tests for Realization, BatchRealization --- .../Realizations/BatchRealization.py | 3 +- .../unit_tests/Realizations/TestBatch.py | 96 ++++++ .../Realizations/TestRealization.py | 302 +++++++----------- 3 files changed, 210 insertions(+), 191 deletions(-) create mode 100644 tests/framework/unit_tests/Realizations/TestBatch.py diff --git a/ravenframework/Realizations/BatchRealization.py b/ravenframework/Realizations/BatchRealization.py index c962b625b6..077cb4a3bc 100644 --- a/ravenframework/Realizations/BatchRealization.py +++ b/ravenframework/Realizations/BatchRealization.py @@ -34,7 +34,7 @@ def __init__(self, batchSize): # } # self.indexMap = {} # information about dimensionality of variables self.batchSize = batchSize # number of realizations that are part of this object - self._realizations = [Realization() for _ in range(min(batchSize, 1))] + self._realizations = [Realization() for _ in range(max(batchSize, 1))] ######## @@ -69,6 +69,7 @@ def __setitem__(self, index, value): @ In, value, any, corresponding value @ Out, None """ + # TODO should we allow providing a Realization object? raise IndexError('Tried to overwrite a Realization object in a Batch!') def pop(self, *args): diff --git a/tests/framework/unit_tests/Realizations/TestBatch.py b/tests/framework/unit_tests/Realizations/TestBatch.py new file mode 100644 index 0000000000..7dc7643445 --- /dev/null +++ b/tests/framework/unit_tests/Realizations/TestBatch.py @@ -0,0 +1,96 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + This Module performs Unit Tests for the BatchRealization objects. +""" + +import os +import sys +import unittest + +# find location of crow, message handler +ravenDir = os.path.abspath(os.path.join(*([os.path.dirname(__file__)]+[os.pardir]*4))) +sys.path.append(ravenDir) + +from ravenframework import Realizations + +class TestBatchRealization(unittest.TestCase): + """ + Unit tests for the Realization object + """ + + def setUp(self): + """ + Named unittest method to set up tests + Runs before each test_* method + @ In, None + @ Out, None + """ + self.batch = Realizations.BatchRealization(2) + self.batch[0]['a'] = 2 + self.batch[0]['b'] = 41 + self.batch[0]['pi'] = 2.14159 + self.batch[0][5] = 'b' + self.batch[1]['a'] = 3 + self.batch[1]['b'] = 42 + self.batch[1]['pi'] = 3.14159 + self.batch[1][5] = 'c' + + def test_getitem(self): + """ tests default indexed getter """ + r0a = self.batch[0]['a'] + r0b = self.batch[0]['b'] + r1a = self.batch[1]['a'] + r1b = self.batch[1]['b'] + self.assertEqual(r0a, 2, 'incorrect index 0 "a" value') + self.assertEqual(r0b, 41, 'incorrect index 0 "b" value') + self.assertEqual(r1a, 3, 'incorrect index 1 "a" value') + self.assertEqual(r1b, 42, 'incorrect index 1 "b" value') + + def test_setitem(self): + """ tests error on setting object """ + with self.assertRaises(IndexError, msg="setting item") as cm: + self.batch[0] = 1.618 + + def test_len(self): + """ tests length builtin """ + self.assertEqual(len(self.batch), 2, 'incorrect "len"') + + def test_iter(self): + """ tests iter builtin """ + for i, rlz in enumerate(self.batch): + self.assertEqual(rlz['a'], 2 + i, f'iter index "{i}" key "a"') + self.assertEqual(rlz['b'], 41 + i, f'iter index "{i}" key "b"') + + def test_pop(self): + """ tests pop method """ + rlz = self.batch[1] + self.assertTrue(rlz in self.batch, 'membership by realization object') + popped = self.batch.pop() + self.assertEqual(popped['b'], 42, 'value check in "pop" rlz') + self.assertFalse(rlz in self.batch, 'membership after pop') + + +if __name__ == '__main__': + unittest.main() + + # + # framework.test_realization + # talbpaul + # 2024-10-23 + # BatchRealization + # + # This test is a Unit Test for the BatchRealization class. + # + # diff --git a/tests/framework/unit_tests/Realizations/TestRealization.py b/tests/framework/unit_tests/Realizations/TestRealization.py index d35dc3784c..a944a737b3 100644 --- a/tests/framework/unit_tests/Realizations/TestRealization.py +++ b/tests/framework/unit_tests/Realizations/TestRealization.py @@ -15,7 +15,9 @@ This Module performs Unit Tests for the Realization objects. """ -import os, sys +import os +import sys +import unittest # find location of crow, message handler ravenDir = os.path.abspath(os.path.join(*([os.path.dirname(__file__)]+[os.pardir]*4))) @@ -23,192 +25,112 @@ from ravenframework import Realizations -print('Module undergoing testing:') -print(Realizations.Realization) -print('') - -results = {"pass":0,"fail":0} - -####### -# -# Quacks like a dict -# -rlz = Realizations.Realization() - -# setitem -a = 3 -b = 42 -pi = 3.14159 -rlz['a'] = a -rlz['b'] = b -rlz['pi'] = pi -rlz[5] = 'c' - - -# membership, contains -for member in ['a', 'b', 'pi', 5]: - if not member in rlz: - print(f'checking member "{member}", got False expected True!') - results['fail'] += 1 - else: - results['pass'] += 1 - -for nonmember in ['d', 2, 1.618, 'values']: - if nonmember in rlz: - print(f'checking member "{nonmember}", got True expected False!') - results['fail'] += 1 - else: - results['pass'] += 1 - - -# getitem -gb = rlz['b'] -if not gb == 42: - print(f'checking getitem "b", got "{gb}" expected "{b}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - -ga = rlz['a'] -if not ga == 3: - print(f'checking getitem "a", got "{ga}" expected "{a}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - -gp = rlz['pi'] -if not gp == 3.14159: - print(f'checking getitem "pi", got "{gp}" expected "{pi}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - -g5 = rlz[5] -if not g5 == 'c': - print(f'checking getitem "5", got "{g5}" expected "c"!') - results['fail'] += 1 -else: - results['pass'] += 1 - - -# get -ga = rlz.get('a') -if not ga == 3: - print(f'checking get "a", got "{ga}" expected "{a}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - -gd = rlz.get('d', 15) -if not gd == 15: - print(f'checking get default, got "{gd}" expected "{15}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - - -# len -if not len(rlz) == 4: - print(f'checking len, got "{len(rlz)}" expected "{3}"!') - results['fail'] += 1 -else: - results['pass'] += 1 - - -# delitem -del rlz['b'] -if 'b' in rlz: - print('checking del, failed to remove "b"!') - results['fail'] += 1 -else: - results['pass'] += 1 - - -# iter -expk = ['a', 'pi', 5] -for i, k in enumerate(rlz): - if k != expk[i]: - print(f'checking iter[{i}], got "{k}" expected "{expk[i]}"!') - results['fail'] += 1 - else: - results['pass'] += 1 - - -# keys -for i, k in enumerate(rlz.keys()): - if k != expk[i]: - print(f'checking keys[{i}], got "{k}" expected "{expk[i]}"!') - results['fail'] += 1 - else: - results['pass'] += 1 - - -# values -expv = [3, 3.14159, 'c'] -for i, v in enumerate(rlz.values()): - if v != expv[i]: - print(f'checking values[{i}], got "{v}" expected "{expv[i]}"!') - results['fail'] += 1 - else: - results['pass'] += 1 - - -# items -for i, (k, v) in enumerate(rlz.items()): - if (k != expk[i]) or (v != expv[i]): - print(f'checking items[{i}], got "({k}, {v})" expected ("{expk[i]}, {expv[i]}")!') - results['fail'] += 1 - else: - results['pass'] += 1 - - -# update -new = {'a': 30, # update old entry - 'b': 420, # add back old entry in new position - 5: 'c2', # update old entry - 'new': 372} # new entry -rlz.update(new) -expk = ['a', 'pi', 5, 'b', 'new'] -expv = [ 30, 3.14159, 'c2', 420, 372] -for i, (k, v) in enumerate(rlz.items()): - if (k != expk[i]) or (v != expv[i]): - print(f'checking update[{i}], got "({k}, {v})" expected ("{expk[i]}, {expv[i]}")!') - results['fail'] += 1 - else: - results['pass'] += 1 - -# pop -val = rlz.pop(5) -if val != 'c2': - print(f'checking pop[5], got "{val}" expected "c")!') - results['fail'] += 1 -else: - results['pass'] += 1 -if 5 in rlz: - print('checking pop[5], failed to remove 5!') - results['fail'] += 1 -else: - results['pass'] += 1 - - - -####### -# -# Results -# - -print(results) - - -sys.exit(results["fail"]) -""" - - framework.test_realization - talbpaul - 2024-10-23 - Realization - - This test is a Unit Test for the Realization class. - - -""" +class TestRealization(unittest.TestCase): + """ + Unit tests for Realization object + """ + def setUp(self): + """ + Named unittest method to set up tests + Also indirectly tests setting variable values in realization + Runs before each test_* method + @ In, None + @ Out, None + """ + self.rlz = Realizations.Realization() + # var-value pairs to use in testing + self.a = 3 + self.b = 42 + self.pi = 3.14159 + self.rlz['a'] = self.a # str var, integer vals + self.rlz['b'] = self.b # + self.rlz['pi'] = self.pi # float val + self.rlz[5] = 'c' # integer var, string val + + + def test_membership(self): + """ tests checking for membership """ + for member in ['a', 'b', 'pi', 5]: + self.assertTrue(member in self.rlz, f'member "{member}" not found in realization') + for nonmember in ['d', 2, 1.618, '_values']: + self.assertTrue(nonmember not in self.rlz, f'nonmember "{nonmember}" found in realization') + + def test_getitem(self): + """ tests accessing variables and values """ + self.assertEqual(self.rlz['b'], 42, 'incorrect stored value of "b"') + self.assertEqual(self.rlz['a'], 3, 'incorrect stored value of "a"') + self.assertEqual(self.rlz['pi'], 3.14159, 'incorrect stored value of "pi"') + self.assertEqual(self.rlz[5], 'c', 'incorrect stored value of "5"') + + def test_get(self): + """ tests accessing variables and default values """ + self.assertEqual(self.rlz.get('a'), 3, 'incorrect "get" for "a"') + self.assertEqual(self.rlz.get('d', 15), 15, 'incorrect "get" default value') + + def test_len(self): + """ tests measuring vector length """ + self.assertEqual(len(self.rlz), 4, 'incorrect length') + + def test_del(self): + """ tests removing an entry """ + del self.rlz['b'] + self.assertTrue('b' not in self.rlz, '"b" still in rlz despite removal') + + def test_iter(self): + """ tests iterating over entries """ + expected = ['a', 'b', 'pi', 5] + for i, k in enumerate(self.rlz): + self.assertEqual(expected[i], k, f'unexpected iter key "{i}"') + + def test_keys(self): + """ tests iterating over keys """ + expected = ['a', 'b', 'pi', 5] + for i, k in enumerate(self.rlz.keys()): + self.assertEqual(expected[i], k, f'unexpected "keys" key "{i}"') + + def test_values(self): + """ tests iterating over values """ + expected = [self.a, self.b, self.pi, 'c'] + for i, k in enumerate(self.rlz.values()): + self.assertEqual(expected[i], k, f'unexpected "values" value "{i}"') + + def test_items(self): + """ tests iterating over key-value pairs """ + expectKeys = ['a', 'b', 'pi', 5] + expectValues = [self.a, self.b, self.pi, 'c'] + for i, (k, v) in enumerate(self.rlz.items()): + self.assertEqual(expectKeys[i], k, f'unexpected "items" key "{i}"') + self.assertEqual(expectValues[i], v, f'unexpected "items" value "{i}"') + + def test_update(self): + """ tests update method """ + new = {'a': 30, # update old entry + 'b': 420, # add back old entry in new position + 5: 'c2', # update old entry + 'new': 372} # new entry + self.rlz.update(new) + expectKeys = ['a', 'b', 'pi', 5, 'new'] + expectValues = [30, 420, 3.14159, 'c2', 372] + for i, (k, v) in enumerate(self.rlz.items()): + self.assertEqual(expectKeys[i], k, f'unexpected "update" key "{i}"') + self.assertEqual(expectValues[i], v, f'unexpected "update" value "{i}"') + + def test_pop(self): + """ tests pop method """ + val = self.rlz.pop(5) + self.assertEqual(val, 'c', 'incorrect "pop" value') + self.assertFalse(5 in self.rlz, 'member present after "pop"') + + +if __name__ == '__main__': # Not run when unittest called from command line or Unittest tester is used + unittest.main() + + # + # framework.test_realization + # talbpaul + # 2024-10-23 + # Realization + # + # This test is a Unit Test for the Realization class. + # + # From a1ce01472d02ba846805af0a1d397aa0ecc5290a Mon Sep 17 00:00:00 2001 From: talbpw Date: Tue, 5 Nov 2024 11:23:20 -0700 Subject: [PATCH 05/18] rename BatchRealization to RealizationBatch --- ravenframework/Realizations/Realization.py | 9 +-- ...atchRealization.py => RealizationBatch.py} | 2 +- ravenframework/Realizations/__init__.py | 2 +- ravenframework/Samplers/Sampler.py | 56 +++++++++++-------- ravenframework/Steps/MultiRun.py | 21 +++---- .../unit_tests/Realizations/TestBatch.py | 2 +- 6 files changed, 47 insertions(+), 45 deletions(-) rename ravenframework/Realizations/{BatchRealization.py => RealizationBatch.py} (99%) diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py index 3ccf0031d2..3692327de3 100644 --- a/ravenframework/Realizations/Realization.py +++ b/ravenframework/Realizations/Realization.py @@ -27,14 +27,15 @@ def __init__(self): @ In, None @ Out, None """ - self._values = {} # mapping of variables to their values + self._values = {} # mapping of variables to their values + self.indexMap = {} # information about dimensionality of variables + self.labels = {} # custom labels for tracking, set externally + self.batchSize = 0 # not a batch, easy way to check + self.isRestart = False # True if model was not run, but data was taken from restart self.inputInfo = {'SampledVars': {}, # additional information about this realization 'SampledVarsPb': {}, 'crowDist': {} } - self.indexMap = {} # information about dimensionality of variables - self.labels = {} # custom labels for tracking, set externally - self.batchSize = 0 # not a batch, easy way to check ######## # diff --git a/ravenframework/Realizations/BatchRealization.py b/ravenframework/Realizations/RealizationBatch.py similarity index 99% rename from ravenframework/Realizations/BatchRealization.py rename to ravenframework/Realizations/RealizationBatch.py index 077cb4a3bc..fdc98044d8 100644 --- a/ravenframework/Realizations/BatchRealization.py +++ b/ravenframework/Realizations/RealizationBatch.py @@ -16,7 +16,7 @@ """ from . import Realization -class BatchRealization: +class RealizationBatch: """ A container for groups of Realization objects, that should mostly invisibly work like a realization """ diff --git a/ravenframework/Realizations/__init__.py b/ravenframework/Realizations/__init__.py index d08113ed66..eb17b54bf6 100644 --- a/ravenframework/Realizations/__init__.py +++ b/ravenframework/Realizations/__init__.py @@ -18,4 +18,4 @@ # These lines ensure that we do not have to do something like: # 'from Samplers.Sampler import Sampler' outside of this submodule from .Realization import Realization -from .BatchRealization import BatchRealization +from .RealizationBatch import RealizationBatch diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index da22fa4cb3..26fe5aff6f 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -29,7 +29,7 @@ from ..utils import utils,randomUtils,InputData, InputTypes from ..utils.graphStructure import evaluateModelsOrder from ..BaseClasses import BaseEntity, Assembler -from ..Realizations import BatchRealization +from ..Realizations import RealizationBatch _vectorPostfixFormat = '__RVEC__{ID}' @@ -1079,7 +1079,7 @@ def generateInput(self, model, modelInput): ##### GENERATE SAMPLE ##### # instantiate a batch of data carrier realizations batchSize = self.getBatchSize() - rlzBatch = BatchRealization(batchSize) + rlzBatch = RealizationBatch(batchSize) if batchSize == 0: # this means the current sampler does not know how to handle batching, so do it one at a time for rlz in rlzBatch: @@ -1100,33 +1100,41 @@ def generateInput(self, model, modelInput): # ND variables self._formNDVariables(rlzBatch) # merge sampler metadata - rlz.inputInfo.update(self.samplerInfo) + for rlz in rlzBatch: + rlz.inputInfo.update(self.samplerInfo) # reset distribution memory for key in self.distDict: if self.distDict[key].getMemory(): self.distDict[key].reset() ##### CHECK RESTART ##### - _, inExisting = self._checkRestartForEvaluation(rlz) - #if not found or not restarting, we have a new point! - if inExisting is None: - # we have a new evaluation, so check its contents for consistency - self._checkSample() - self.raiseADebug(f' ... Sample point {rlz.inputInfo["prefix"]}: {rlz.values}') - for var, val in rlz.items(): - self.raiseADebug(f' ... - "{var}": "{val}"') - return 0, rlz, modelInput - #otherwise, return the restart point - else: - # TODO use realization format as per new data object (no subspaces) - # TODO use Realization object? - self.raiseADebug('Point found in restart!') - rlz = {} - # we've fixed it so the input and output space don't really matter, so use restartData's own definition - # DO format the data as atleast_1d so it's consistent in the ExternalModel for users (right?) - rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) - rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) - rlz['metadata'] = copy.deepcopy(self.inputInfo) # TODO need deepcopy only because inputInfo is on self - return 1, rlz, None + # check each rlz for restart, and if so, fill its values and submit it as complete + for r, rlz in enumerate(rlzBatch): + _, inExisting = self._checkRestartForEvaluation(rlz) + if inExisting is None: + # we have a new evaluation, so check its contents for consistency + self._checkSample() + self.raiseADebug(f' ... Batch Sample point {r}, prefix {rlz.inputInfo["prefix"]}, (var, val):') + for var, val in rlz.items(): + self.raiseADebug(f' ... - "{var}": "{val}"') + else: + self.raiseADebug(f'Batch Point {r} found in restart!') + # we've fixed it so the input and output space don't really matter, so use restartData's own definition + # DO format the data as atleast_1d so it's consistent in the ExternalModel for users (right?) + # TODO OLD: + # rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) + # rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) + # rlz['metadata'] = rlz.inputInfo # NOTE soft link + # TODO new: + # TODO can we combine these? + # inputs = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) + # outputs = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) + # TODO method for getting Realization object out of DataObjects? + varvals = dict((var, np.atleast_1d(inExisting[var])) for var in self.restartData.getVars()) + rlz.update(varvals) + rlz.isRestart = True + # END if restart + # END loop over rlz for restart checking + return rlzBatch, None def generateInputBatch(self, myInput, model, batchSize, projector=None): """ diff --git a/ravenframework/Steps/MultiRun.py b/ravenframework/Steps/MultiRun.py index 9ebfad031f..b461e46d9a 100644 --- a/ravenframework/Steps/MultiRun.py +++ b/ravenframework/Steps/MultiRun.py @@ -324,21 +324,14 @@ def _findANewInputToRun(self, sampler, model, inputs, outputs, jobHandler): inherit from some base "Data" so that we can ensure a consistent interface for these?) @ In, jobHandler, object, the raven object used to handle jobs - @ Out, newInp, list, list containing the new inputs (or None if a restart) + @ Out, newInp, RealizationBatch, list containing the new inputs (or None if a restart) """ - # The value of "found" determines what the Sampler is ready to provide. - # case 0: a new sample has been discovered and can be run, and newInp is a new input list. - # case 1: found the input in restart, and newInp is a realization dictionary of data to use - found, rlz, modelInp = sampler.generateInput(model, inputs) - if found == 1: - # TODO REMOVE kwargs = rlz.inputInfo # TODO deeper copy needed? shouldn't be ... - # "submit" the finished run - jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) - return None - # NOTE: we return None here only because the Sampler's "counter" is not correctly passed - # through if we add several samples at once through the restart. If we actually returned - # a Realization object from the Sampler, this would not be a problem. - talbpaul - return rlz, modelInp + batch, modelInp = sampler.generateInput(model, inputs) + for rlz in batch: + if rlz.isRestart: + # "submit" the finished run + jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) + return batch, modelInp def flushStep(self): """ diff --git a/tests/framework/unit_tests/Realizations/TestBatch.py b/tests/framework/unit_tests/Realizations/TestBatch.py index 7dc7643445..435c04c86c 100644 --- a/tests/framework/unit_tests/Realizations/TestBatch.py +++ b/tests/framework/unit_tests/Realizations/TestBatch.py @@ -37,7 +37,7 @@ def setUp(self): @ In, None @ Out, None """ - self.batch = Realizations.BatchRealization(2) + self.batch = Realizations.RealizationBatch(2) self.batch[0]['a'] = 2 self.batch[0]['b'] = 41 self.batch[0]['pi'] = 2.14159 From aa09b4596b505eae2b7a0cd06759a8a27cd411dc Mon Sep 17 00:00:00 2001 From: talbpw Date: Tue, 5 Nov 2024 15:49:42 -0700 Subject: [PATCH 06/18] test_restart_Grid is getting through fresh samples but not restart yet --- ravenframework/JobHandler.py | 29 +++- ravenframework/Models/Dummy.py | 31 ++-- ravenframework/Models/ExternalModel.py | 63 ++++---- ravenframework/Models/Model.py | 56 ++++---- ravenframework/Realizations/Realization.py | 13 +- .../Realizations/RealizationBatch.py | 8 +- ravenframework/Runners/Factory.py | 1 - ravenframework/Runners/InternalRunner.py | 10 +- ravenframework/Runners/Runner.py | 9 +- ravenframework/Samplers/Grid.py | 56 ++++---- ravenframework/Samplers/Sampler.py | 134 +++++++++--------- ravenframework/Steps/MultiRun.py | 78 ++++------ 12 files changed, 246 insertions(+), 242 deletions(-) diff --git a/ravenframework/JobHandler.py b/ravenframework/JobHandler.py index e8ed4ce8cc..ba94858e09 100644 --- a/ravenframework/JobHandler.py +++ b/ravenframework/JobHandler.py @@ -686,7 +686,34 @@ def startLoop(self): # probably when we move to Python 3. time.sleep(self.sleepTime) - def addJob(self, args, functionToRun, identifier, metadata=None, forceUseThreads = False, uniqueHandler="any", clientQueue = False, groupInfo = None): + def addJobBatch(self, batch, model, modelInput, samplerType, evalFunc): + """ + Adds a batch of jobs to the internal queue. + @ In, batch, RealizationBatch, set of realizations to add + @ In, model, Model, model instance to run + @ In, modelInput, list, inputs for the Model + @ In, samplerType, str, sampler that generated this request + @ In, evalFunc, callable, method to be executed + @ Out, None + """ + # TODO register batch to fill later? + for rlz in batch: + if rlz.isRestart: + self.addFinishedJob(rlz, metadata=rlz.inputInfo) + else: + self.addJob( + (model, modelInput, samplerType, rlz), + evalFunc, + rlz.inputInfo['prefix'], + metadata = rlz.inputInfo, + uniqueHandler=rlz.inputInfo.get('uniqueHandler', 'any'), + forceUseThreads=rlz.inputInfo.get('forceThreads', False), + groupInfo={'id': batch.ID, 'size': len(batch)} + ) + + def addJob(self, args, functionToRun, identifier, metadata=None, + forceUseThreads=False, uniqueHandler="any", clientQueue=False, + groupInfo=None): """ Method to add an internal run (function execution) @ In, args, dict, this is a list of arguments that will be passed as diff --git a/ravenframework/Models/Dummy.py b/ravenframework/Models/Dummy.py index f6f4625231..8795fa3d9a 100644 --- a/ravenframework/Models/Dummy.py +++ b/ravenframework/Models/Dummy.py @@ -135,7 +135,7 @@ def _inputToInternal(self,dataIN): localInput = dataIN #here we do not make a copy since we assume that the dictionary is for just for the model usage and any changes are not impacting outside return localInput - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. here only a PointSet is accepted a local copy of the values is performed. @@ -143,17 +143,21 @@ def createNewInput(self,myInput,samplerType,**kwargs): The copied values are returned as a dictionary back @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, Realization from whiech to build input @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form """ - inputDict = self._inputToInternal(myInput[0]) - self._replaceVariablesNamesWithAliasSystem(inputDict,'input',False) + inputDict = self._inputToInternal(myInput[0]) + self._replaceVariablesNamesWithAliasSystem(inputDict, 'input', False) - if 'SampledVars' in kwargs.keys(): - sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) - for key in kwargs['SampledVars'].keys(): - inputDict[key] = np.atleast_1d(kwargs['SampledVars'][key]) + if len(rlz): + self._replaceVariablesNamesWithAliasSystem(rlz, 'input', False) + for var, val in rlz.items(): + inputDict[var] = np.atleast_1d(val) + ### OLD ### + # if 'SampledVars' in kwargs.keys(): + # sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) + # for key in kwargs['SampledVars'].keys(): + # inputDict[key] = np.atleast_1d(kwargs['SampledVars'][key]) missing = list(var for var,val in inputDict.items() if val is None) if len(missing) != 0: @@ -169,23 +173,22 @@ def createNewInput(self,myInput,samplerType,**kwargs): kwargs['SampledVars'] = sampledVars except KeyError: pass - return [(inputDict)],copy.deepcopy(kwargs) + return [(inputDict)],copy.deepcopy(rlz) @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, Realization from whiech to build input @ Out, returnValue, tuple, This will hold two pieces of information, the first item will be the input data used to generate this sample, the second item will be the output of this model given the specified inputs """ - Input = self.createNewInput(myInput, samplerType, **kwargs) + Input = self.createNewInput(myInput, samplerType, rlz) inRun = self._manipulateInput(Input[0]) # alias system self._replaceVariablesNamesWithAliasSystem(inRun,'input',True) diff --git a/ravenframework/Models/ExternalModel.py b/ravenframework/Models/ExternalModel.py index 569aefb6e9..2ec421c89e 100644 --- a/ravenframework/Models/ExternalModel.py +++ b/ravenframework/Models/ExternalModel.py @@ -119,38 +119,35 @@ def initialize(self,runInfo,inputs,initDict=None): self.sim.initialize(self.initExtSelf,runInfo,inputs) Dummy.initialize(self, runInfo, inputs) - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self,myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, sample point @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form """ modelVariableValues = {} if 'createNewInput' in dir(self.sim): - if 'SampledVars' in kwargs.keys(): - sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) - extCreateNewInput = self.sim.createNewInput(self.initExtSelf,myInput,samplerType,**kwargs) - if extCreateNewInput is None: - self.raiseAnError(AttributeError,'in external Model '+self.ModuleToLoad+' the method createNewInput must return something. Got: None') - if type(extCreateNewInput).__name__ != "dict": - self.raiseAnError(AttributeError,'in external Model '+self.ModuleToLoad+ ' the method createNewInput must return a dictionary. Got type: ' +type(extCreateNewInput).__name__) - if 'SampledVars' in kwargs.keys() and len(self.alias['input'].keys()) != 0: - kwargs['SampledVars'] = sampledVars + if len(rlz): #'SampledVars' in kwargs.keys(): + sampledVars = self._replaceVariablesNamesWithAliasSystem(rlz, 'input', False) + extCreateNewInput = self.sim.createNewInput(self.initExtSelf, myInput, samplerType, rlz) + if not isinstance(extCreateNewInput, dict): + self.raiseAnError(AttributeError, f'in external Model {self.ModuleToLoad} the method createNewInput ' + + f'must return a dictionary. Got "{type(extCreateNewInput)}".') + if len(rlz) and len(self.alias['input']): + rlz.update(sampledVars) # add sampled vars - if 'SampledVars' in kwargs: - for key in kwargs['SampledVars']: - if key not in extCreateNewInput: - extCreateNewInput[key] = kwargs['SampledVars'][key] - - newInput = ([(extCreateNewInput)],copy.deepcopy(kwargs)) + if len(rlz): + for var, val in rlz: + if var not in extCreateNewInput: + extCreateNewInput[var] = val + newInput = ([(extCreateNewInput)], copy.deepcopy(rlz)) else: - newInput = Dummy.createNewInput(self, myInput,samplerType,**kwargs) - if 'SampledVars' in kwargs: - modelVariableValues.update(kwargs['SampledVars']) + newInput = Dummy.createNewInput(self, myInput, samplerType, rlz) + if len(rlz): + modelVariableValues.update(rlz) return newInput, copy.copy(modelVariableValues) def localInputAndChecks(self,xmlNode): @@ -306,37 +303,37 @@ def _externalRun(self, Input): return outcomes, self @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the 'SampledVars' that contains a dictionary {'name variable':value} + @ In, rlz, Realization, realization from sampling @ Out, returnValue, tuple, This will hold two pieces of information, the first item will be the input data used to generate this sample, the second item will be the output of this model given the specified inputs """ - Input = self.createNewInput(myInput, samplerType, **kwargs) + Input = self.createNewInput(myInput, samplerType, rlz) inRun = copy.copy(self._manipulateInput(Input[0][0])) # collect results from model run - result,instSelf = self._externalRun(inRun,) + result, instSelf = self._externalRun(inRun,) evalIndexMap = result.get('_indexMap', [{}])[0] # build realization ## do it in this order to make sure only the right variables are overwritten ## first inRun, which has everything from self.* and Input[*] - rlz = dict((var, np.atleast_1d(val)) for var, val in inRun.items()) + # FIXME should this be a proper Realization object? Should we update the one we already have? + res = dict((var, np.atleast_1d(val)) for var, val in inRun.items()) ## then result, which has the expected outputs and possibly changed inputs - rlz.update(dict((var, np.atleast_1d(val)) for var, val in result.items())) + res.update(dict((var, np.atleast_1d(val)) for var, val in result.items())) ## then get the metadata from kwargs - rlz.update(dict((var, np.atleast_1d(val)) for var, val in kwargs.items())) + res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.inputInfo.items())) ## then get the inputs from SampledVars (overwriting any other entries) - rlz.update(dict((var, np.atleast_1d(val)) for var, val in kwargs['SampledVars'].items())) - if '_indexMap' in rlz: - rlz['_indexMap'][0].update(evalIndexMap) - return rlz + res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.items())) + if '_indexMap' in res: + res['_indexMap'].update(evalIndexMap) + return res def collectOutput(self,finishedJob,output,options=None): """ diff --git a/ravenframework/Models/Model.py b/ravenframework/Models/Model.py index a987da62be..4e92f9851c 100644 --- a/ravenframework/Models/Model.py +++ b/ravenframework/Models/Model.py @@ -21,7 +21,6 @@ import copy import numpy as np import abc -import sys import importlib import pickle #External Modules End-------------------------------------------------------------------------------- @@ -421,46 +420,39 @@ def createNewInput(self,myInput,samplerType,**kwargs): """ return [(copy.copy(kwargs))] - def submit(self, myInput, samplerType, jobHandler, **kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, None """ - nRuns = 1 - batchMode = kwargs.get("batchMode", False) - if batchMode: - nRuns = kwargs["batchInfo"]['nRuns'] - - for index in range(nRuns): - if batchMode: - kw = kwargs['batchInfo']['batchRealizations'][index] - else: - kw = kwargs - - prefix = kw.get("prefix") - uniqueHandler = kw.get("uniqueHandler",'any') - forceThreads = kw.get("forceThreads",False) - - ## These kw are updated by createNewInput, so the job either should not - ## have access to the metadata, or it needs to be updated from within the - ## evaluateSample function, which currently is not possible since that - ## function does not know about the job instance. - metadata = kw - - ## This may look a little weird, but due to how the parallel python library - ## works, we are unable to pass a member function as a job because the - ## pp library loses track of what self is, so instead we call it from the - ## class and pass self in as the first parameter - jobHandler.addJob((self, myInput, samplerType, kw), self.__class__.evaluateSample, prefix, metadata=metadata, - uniqueHandler=uniqueHandler, forceUseThreads=forceThreads, - groupInfo={'id': kwargs['batchInfo']['batchId'], 'size': nRuns} if batchMode else None) + jobHandler.addJobBatch(batch, self, myInput, samplerType, self.__class__.evaluateSample) + ### OLD ### + # for rlz in batch: + # if rlz.isRestart: + # jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) + # else: + # prefix = rlz.get('prefix') + # uniqueHandler = rlz.inputInfo.get('uniqueHandler', 'any') + # forceThreads = rlz.inputInfo.get('forceThreads', False) + # groupInfo = { + # 'id': rlz.inputInfo['batchId'], + # 'size': len(batch), + # } + # jobHandler.addJob( + # (self, myInput, samplerType, rlz.inputInfo), + # self.__class__.evaluateSample, + # prefix, + # metadata = rlz.inputInfo, + # uniqueHandler=uniqueHandler, + # forceUseThreads=forceThreads, + # groupInfo=groupInfo + # ) def addOutputFromExportDictionary(self,exportDict,output,options,jobIdentifier): """ diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py index 3692327de3..4bbf60adf3 100644 --- a/ravenframework/Realizations/Realization.py +++ b/ravenframework/Realizations/Realization.py @@ -32,15 +32,22 @@ def __init__(self): self.labels = {} # custom labels for tracking, set externally self.batchSize = 0 # not a batch, easy way to check self.isRestart = False # True if model was not run, but data was taken from restart - self.inputInfo = {'SampledVars': {}, # additional information about this realization - 'SampledVarsPb': {}, - 'crowDist': {} + self.inputInfo = {'SampledVars': {}, # additional information about this realization + 'SampledVarsPb': {}, # point probability information for this realization } ######## # # other useful methods # + def setRestart(self, varVals): + """ + Sets this Realization to have values coming from a restart point. + @ In, varVals, dict, new var-value mapping + @ Out, None + """ + self.update(varVals) + self.isRestart = True ######## # diff --git a/ravenframework/Realizations/RealizationBatch.py b/ravenframework/Realizations/RealizationBatch.py index fdc98044d8..20e6fb9043 100644 --- a/ravenframework/Realizations/RealizationBatch.py +++ b/ravenframework/Realizations/RealizationBatch.py @@ -26,15 +26,9 @@ def __init__(self, batchSize): @ In, None @ Out, None """ - # TODO are any of these shared across realizations? - # self._values = {} # mapping of variables to their values - # self.inputInfo = {'SampledVars': {}, # additional information about this realization - # 'SampledVarsPb': {}, - # 'crowDist': {} - # } - # self.indexMap = {} # information about dimensionality of variables self.batchSize = batchSize # number of realizations that are part of this object self._realizations = [Realization() for _ in range(max(batchSize, 1))] + self.ID = None ######## diff --git a/ravenframework/Runners/Factory.py b/ravenframework/Runners/Factory.py index 5f9df08024..73ddafe985 100644 --- a/ravenframework/Runners/Factory.py +++ b/ravenframework/Runners/Factory.py @@ -32,7 +32,6 @@ def returnInstance(self, Type, funcArgs, func, **kwargs): """ Returns an instance pointer from this module. @ In, Type, string, requested object - @ In, caller, object, requesting object @ In, funcArgs, list, arguments to be passed as func(*funcArgs) @ In, func, method or function, function that needs to be run @ In, kwargs, dict, additional keyword arguments to constructor diff --git a/ravenframework/Runners/InternalRunner.py b/ravenframework/Runners/InternalRunner.py index e0e51072a6..f2f88c5ce6 100644 --- a/ravenframework/Runners/InternalRunner.py +++ b/ravenframework/Runners/InternalRunner.py @@ -25,10 +25,10 @@ class InternalRunner(Runner): """ Generic base Class for running internal objects """ - def __init__(self, args, functionToRun, **kwargs): + def __init__(self, functionArgs, functionToRun, **kwargs): """ Init method - @ In, args, dict, this is a list of arguments that will be passed as + @ In, functionArgs, tuple, this is a list of arguments that will be passed as function parameters into whatever method is stored in functionToRun. e.g., functionToRun(*args) @ In, functionToRun, method or function, function that needs to be run @@ -40,7 +40,11 @@ def __init__(self, args, functionToRun, **kwargs): super().__init__(**kwargs) ## Other parameters passed at initialization - self.args = copy.copy(args) + print('DEBUGG internalRunner functionArgs:') + print('DEBUGG ...', type(functionArgs)) + for x in functionArgs: + print('DEBUGG ... ...', x) + self.args = copy.copy(functionArgs) self.functionToRun = functionToRun ## Other parameters manipulated internally diff --git a/ravenframework/Runners/Runner.py b/ravenframework/Runners/Runner.py index 45abe28670..16f8f031e4 100644 --- a/ravenframework/Runners/Runner.py +++ b/ravenframework/Runners/Runner.py @@ -14,21 +14,14 @@ """ Created on September 12, 2016 """ -#for future compatibility with Python 3-------------------------------------------------------------- -from __future__ import division, print_function, unicode_literals, absolute_import -#End compatibility block for Python 3---------------------------------------------------------------- - #External Modules------------------------------------------------------------------------------------ -import sys -import abc import copy import time import datetime #External Modules End-------------------------------------------------------------------------------- #Internal Modules------------------------------------------------------------------------------------ -from ..utils import utils -from ..BaseClasses import BaseType, MessageUser +from ..BaseClasses import MessageUser from .Error import Error #Internal Modules End-------------------------------------------------------------------------------- diff --git a/ravenframework/Samplers/Grid.py b/ravenframework/Samplers/Grid.py index 8aee4f482e..29a00c5f7c 100644 --- a/ravenframework/Samplers/Grid.py +++ b/ravenframework/Samplers/Grid.py @@ -138,8 +138,9 @@ def localGetCurrentSetting(self): and each parameter's initial value as the dictionary values """ paramDict = {} - for var, value in self.values.items(): - paramDict[f'coordinate {var} has value'] = value + # for var, value in self.values.items(): + # paramDict[f'coordinate {var} has value'] = value + # FIXME we don't have rlz yet return paramDict @@ -155,18 +156,19 @@ def localInitialize(self): self.gridEntity.initialize() self.limit = self.gridEntity.len() - def localGenerateInput(self, model, oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the realization should be ready to be sent to the model + @ In, rlz, Realization, mapping from variables to values for sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ - self.inputInfo['distributionName'] = {} # Used to determine which distribution to change if needed. - self.inputInfo['distributionType'] = {} # Used to determine which distribution type is used + rlz.inputInfo['distributionName'] = {} # Used to determine which distribution to change if needed. + rlz.inputInfo['distributionType'] = {} # Used to determine which distribution type is used weight = 1.0 recastDict = {} for i in range(len(self.axisName)): @@ -208,10 +210,10 @@ def localGenerateInput(self, model, oldInput): # compute the SampledVarsPb for 1-D distribution if ("" in varName) or (self.variables2distributionsMapping[varName]['totDim'] == 1): for key in varName.strip().split(','): - self.inputInfo['distributionName'][key] = self.toBeSampled[varName] - self.inputInfo['distributionType'][key] = self.distDict[varName].type - self.values[key] = coordinates[varName] - self.inputInfo['SampledVarsPb'][key] = self.distDict[varName].pdf(self.values[key]) + rlz.inputInfo['distributionName'][key] = self.toBeSampled[varName] + rlz.inputInfo['distributionType'][key] = self.distDict[varName].type + rlz[key] = coordinates[varName] + rlz.inputInfo['SampledVarsPb'][key] = self.distDict[varName].pdf(rlz[key]) # compute the SampledVarsPb for N-D distribution else: if self.variables2distributionsMapping[varName]['reducedDim'] == 1: @@ -224,11 +226,11 @@ def localGenerateInput(self, model, oldInput): position = utils.first(var.values()) ndCoordinate[positionList.index(position)] = float(coordinates[variable.strip()]) for key in variable.strip().split(','): - self.inputInfo['distributionName'][key] = self.toBeSampled[variable] - self.inputInfo['distributionType'][key] = self.distDict[variable].type - self.values[key] = coordinates[variable] + rlz.inputInfo['distributionName'][key] = self.toBeSampled[variable] + rlz.inputInfo['distributionType'][key] = self.distDict[variable].type + rlz[key] = coordinates[variable] # Based on the discussion with Diego, we will use the following to compute SampledVarsPb. - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinate) + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinate) # Compute the ProbabilityWeight if ("" in varName) or (self.variables2distributionsMapping[varName]['totDim']==1): if self.distDict[varName].getDistType() == 'Discrete': @@ -236,28 +238,28 @@ def localGenerateInput(self, model, oldInput): else: if self.gridInfo[varName]=='CDF': if coordinatesPlusOne[varName] != sys.maxsize and coordinatesMinusOne[varName] != -sys.maxsize: - midPlusCDF = (coordinatesPlusOne[varName]+self.distDict[varName].cdf(self.values[key]))/2.0 - midMinusCDF = (coordinatesMinusOne[varName]+self.distDict[varName].cdf(self.values[key]))/2.0 + midPlusCDF = (coordinatesPlusOne[varName]+self.distDict[varName].cdf(rlz[key]))/2.0 + midMinusCDF = (coordinatesMinusOne[varName]+self.distDict[varName].cdf(rlz[key]))/2.0 if coordinatesMinusOne[varName] == -sys.maxsize: - midPlusCDF = (coordinatesPlusOne[varName]+self.distDict[varName].cdf(self.values[key]))/2.0 + midPlusCDF = (coordinatesPlusOne[varName]+self.distDict[varName].cdf(rlz[key]))/2.0 midMinusCDF = 0.0 if coordinatesPlusOne[varName] == sys.maxsize: midPlusCDF = 1.0 - midMinusCDF = (coordinatesMinusOne[varName]+self.distDict[varName].cdf(self.values[key]))/2.0 + midMinusCDF = (coordinatesMinusOne[varName]+self.distDict[varName].cdf(rlz[key]))/2.0 gridWeight = midPlusCDF - midMinusCDF else: # Value if coordinatesPlusOne[varName] != sys.maxsize and coordinatesMinusOne[varName] != -sys.maxsize: - midPlusValue = (self.values[key]+coordinatesPlusOne[varName])/2.0 - midMinusValue = (self.values[key]+coordinatesMinusOne[varName])/2.0 + midPlusValue = (rlz[key]+coordinatesPlusOne[varName])/2.0 + midMinusValue = (rlz[key]+coordinatesMinusOne[varName])/2.0 gridWeight = self.distDict[varName].cdf(midPlusValue) - self.distDict[varName].cdf(midMinusValue) if coordinatesMinusOne[varName] == -sys.maxsize: - midPlusValue = (self.values[key]+coordinatesPlusOne[varName])/2.0 + midPlusValue = (rlz[key]+coordinatesPlusOne[varName])/2.0 gridWeight = self.distDict[varName].cdf(midPlusValue) - 0.0 if coordinatesPlusOne[varName] == sys.maxsize: - midMinusValue = (self.values[key]+coordinatesMinusOne[varName])/2.0 + midMinusValue = (rlz[key]+coordinatesMinusOne[varName])/2.0 gridWeight = 1.0 - self.distDict[varName].cdf(midMinusValue) - self.inputInfo['ProbabilityWeight-'+varName] = gridWeight + rlz.inputInfo['ProbabilityWeight-'+varName] = gridWeight weight *= gridWeight # ND variable else: @@ -294,11 +296,11 @@ def localGenerateInput(self, model, oldInput): if coordinatesPlusOne[variable] == sys.maxsize: dxs[positionList.index(position)] = self.distDict[varName].returnUpperBound(positionList.index(position)) - (coordinates[variable.strip()]+coordinatesMinusOne[variable])/2.0 ndCoordinate[positionList.index(position)] = (self.distDict[varName].returnUpperBound(positionList.index(position)) + (coordinates[variable.strip()]+coordinatesMinusOne[variable])/2.0) /2.0 - self.inputInfo['ProbabilityWeight-'+distName] = self.distDict[varName].cellIntegral(ndCoordinate,dxs) + rlz.inputInfo['ProbabilityWeight-'+distName] = self.distDict[varName].cellIntegral(ndCoordinate,dxs) weight *= self.distDict[varName].cellIntegral(ndCoordinate,dxs) - self.inputInfo['PointProbability' ] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['ProbabilityWeight'] = copy.deepcopy(weight) - self.inputInfo['SamplerType'] = 'Grid' + rlz.inputInfo['PointProbability' ] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['ProbabilityWeight'] = copy.deepcopy(weight) + rlz.inputInfo['SamplerType'] = 'Grid' def flush(self): """ diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index 26fe5aff6f..ae10908530 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -204,47 +204,49 @@ def __init__(self): @ Out, None """ super().__init__() - self.batch = 1 # determines the size of each sampling batch to run - self.onlySampleAfterCollecting = True # if True, then no new samples unless collection has occurred - self.ableToHandelFailedRuns = False # is this sampler able to handle failed runs? - self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize - self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) - self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) - self.toBeSampled = {} # Sampling mapping dictionary {'Variable Name':'name of the distribution'} - self.dependentSample = {} # Sampling mapping dictionary for dependent variables {'Variable Name':'name of the external function'} - self.distDict = {} # Contains the instance of the distribution to be used, it is created every time the sampler is initialized. keys are the variable names - self.funcDict = {} # Mapping between variable name and the a 2-element namedtuple namedtuple('func', ['methodName', 'instance']) containing: - # element 0 (methodName): name of the method in the function to be be invoked. Either the default "evaluate", or the function name - self.variableFunctionExecutionList = [] # This is an ordered sequence of functional variable - # (linked to functions) that need to be performed (in case of - # interdependency). This list is always created. If no interdependence - # is detected, the order is just random, otherwise the order is - # determined through graph theory. - # element 1 (instance): instance of the function to be used, it is created every time the sampler is initialized. - # TODO REMOVE self.values = {} # for each variable the current value {'var name':value} - self.ndVariables = {} # stores the dimensionality (names and shapes) of each variable by name, as tuple e.g. shape = (2,3) for [[#,#,#],[#,#,#]] - # TODO REMOVE self.inputInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below - self.samplerInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below - self.initSeed = None # if not provided the seed is randomly generated at the initialization of the sampler, the step can override the seed by sending in another one - # TODO REMOVE self.inputInfo['SampledVars' ] = self.values # this is the location where to get the values of the sampled variables - # TODO REMOVE self.inputInfo['SampledVarsPb' ] = {} # this is the location where to get the probability of the sampled variables - # TODO REMOVE self.inputInfo['crowDist'] = {} # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object - self.constants = {} # In this dictionary - self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding - self.printTag = self.type # prefix for all prints (sampler type) - - self.restartData = None # presampled points to restart from - self.restartTolerance = 1e-14 # strictness with which to find matches in the restart data - self.restartIsCompatible = None # flags restart as compatible with the sampling scheme (used to speed up checking) - self._jobsToEnd = [] # list of strings, containing job prefixes that should be cancelled. - - self.constantSourceData = None # dictionary of data objects from which constants can take values - self.constantSources = {} # storage for the way to obtain constant information - - self._endJobRunnable = sys.maxsize # max number of inputs creatable by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) + ### COUNTERS AND FLAGS ### + self.batch = 1 # determines the size of each sampling batch to run + self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize + self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) + self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) + self.initSeed = None # if not provided the seed is randomly generated at the initialization of the sampler, the step can override the seed by sending in another one + self.printTag = self.type # prefix for all prints (sampler type) + self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding + self.onlySampleAfterCollecting = True # if True, then no new samples unless collection has occurred + self.ableToHandelFailedRuns = False # is this sampler able to handle failed runs? + + ### INFO DICTS ### + self.samplerInfo = { # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below + 'crowDist': {}, # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object + } + self.toBeSampled = {} # Sampling mapping dictionary {'Variable Name':'name of the distribution'} + self.distDict = {} # Contains the instance of the distribution to be used, it is created every time the sampler is initialized. keys are the variable names + self.dependentSample = {} # Sampling mapping dictionary for dependent variables {'Variable Name':'name of the external function'} + # element 0 (methodName): name of the method in the function to be be invoked. Either the default "evaluate", or the function name + self.ndVariables = {} # stores the dimensionality (names and shapes) of each variable by name, as tuple e.g. shape = (2,3) for [[#,#,#],[#,#,#]] + self.constants = {} # Unsampled constant variables mapped to values + self.constantSources = {} # storage for the way to obtain constant information + self.constantSourceData = None # dictionary of data objects from which constants can take values self.distributions2variablesIndexList = {} - ###### + ### FUNCTION EVALUATIONS ### + self.funcDict = {} # Mapping between variable name and the a 2-element namedtuple namedtuple('func', ['methodName', 'instance']) containing: + self.variableFunctionExecutionList = [] # This is an ordered sequence of functional variable + # (linked to functions) that need to be performed (in case of + # interdependency). This list is always created. If no interdependence + # is detected, the order is just random, otherwise the order is + # determined through graph theory. + # element 1 (instance): instance of the function to be used, it is created every time the sampler is initialized. + ### JOB MANAGEMENT ### + self._jobsToEnd = [] # list of strings, containing job prefixes that should be cancelled. + self._endJobRunnable = sys.maxsize # max number of inputs creatable by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) + + ### RESTART DATA ### + self.restartData = None # presampled points to restart from + self.restartTolerance = 1e-14 # strictness with which to find matches in the restart data + self.restartIsCompatible = None # flags restart as compatible with the sampling scheme (used to speed up checking) + + ### ND MAPPING ### # for each variable 'varName' , the following informations are included: 'varName': {'dim': 1, 'reducedDim': 1,'totDim': 2, 'name': 'distName'} ; # dim = dimension of the variable; # reducedDim = dimension of the variable in the transformed space; @@ -254,18 +256,20 @@ def __init__(self): self.distributions2variablesMapping = {} # this dictionary contains a dictionary for each ND distribution (key). This latter dictionary contains the initialization parameters of the # ND inverseCDF ('initialGridDisc' and 'tolerance') - self.NDSamplingParams = {} - ###### + self.NDSamplingParams = {} + + ### PCA TRANSFORM ### + self.variablesTransformationDict = {} # for each variable 'modelName', the following informations are included: + # {'modelName': {latentVariables:[latentVar1, latentVar2, ...], manifestVariables:[manifestVar1,manifestVar2,...]}} + self.transformationMethod = {} # transformation method used in variablesTransformation node {'modelName':method} + self.entitiesToRemove = [] # This variable is used in order to make sure the transformation info is printed once in the output xml file. + + ### ASSEMBLING ### self.addAssemblerObject('Restart', InputData.Quantity.zero_to_infinity) self.addAssemblerObject('ConstantSource', InputData.Quantity.zero_to_infinity) - #used for PCA analysis - self.variablesTransformationDict = {} # for each variable 'modelName', the following informations are included: - # {'modelName': {latentVariables:[latentVar1, latentVar2, ...], manifestVariables:[manifestVar1,manifestVar2,...]}} - self.transformationMethod = {} # transformation method used in variablesTransformation node {'modelName':method} - self.entitiesToRemove = [] # This variable is used in order to make sure the transformation info is printed once in the output xml file. - def _generateDistributions(self, rlz, availableDist, availableFunc): + def _generateDistributions(self, availableDist, availableFunc): """ Generates the distributions and functions. @ In, availableDist, dict, dict of distributions @@ -274,11 +278,11 @@ def _generateDistributions(self, rlz, availableDist, availableFunc): """ if self.initSeed is not None: randomUtils.randomSeed(self.initSeed) - for key in self.toBeSampled: - if self.toBeSampled[key] not in availableDist: - self.raiseAnError(IOError, f'Distribution {self.toBeSampled[key]} not found among available distributions (check input)!') - self.distDict[key] = availableDist[self.toBeSampled[key]] - rlz.inputInfo['crowDist'][key] = json.dumps(rlz.distDict[key].getCrowDistDict()) + for var, dist in self.toBeSampled.items(): + if dist not in availableDist: + self.raiseAnError(IOError, f'Distribution "{dist}" not found among available distributions (check input)!') + self.distDict[var] = availableDist[dist] + self.samplerInfo['crowDist'][var] = json.dumps(self.distDict[var].getCrowDistDict()) for key, val in self.dependentSample.items(): if val not in availableFunc.keys(): self.raiseAnError(ValueError, f'Function {val} was not found among the available functions:', availableFunc.keys()) @@ -858,20 +862,20 @@ def localStillReady(self, ready): return ready - def _checkRestartForEvaluation(self): + def _checkRestartForEvaluation(self, rlz): """ Checks restart data object (if any) for matching realization. + @ In, rlz, Realization, realization to check for in restart @ In, None @ Out, index, int, index of matching realization in restart (None if not found) @ Out, inExisting, dict, matching realization (None if not found) """ #check if point already exists if self.restartData is not None: - index,inExisting = self.restartData.realization(matchDict=self.values, tol=self.restartTolerance, unpackXArray=True) + index,inExisting = self.restartData.realization(matchDict=rlz, tol=self.restartTolerance, unpackXArray=True) else: index = None inExisting = None - return index, inExisting def _constantVariables(self, rlzBatch): @@ -1075,7 +1079,8 @@ def generateInput(self, model, modelInput): @ Out, modelInput, potentially perturbed? original inputs for model, or None if taken from restart """ if model is not None: - model.getAdditionalInputEdits(rlz.inputInfo) + # FIXME does samplerInfo have all the information? It should ... + model.getAdditionalInputEdits(self.samplerInfo) ##### GENERATE SAMPLE ##### # instantiate a batch of data carrier realizations batchSize = self.getBatchSize() @@ -1097,15 +1102,15 @@ def generateInput(self, model, modelInput): # constants and functioned values self._constantVariables(rlzBatch) self._functionalVariables(rlzBatch) - # ND variables + # ND variables127G self._formNDVariables(rlzBatch) # merge sampler metadata for rlz in rlzBatch: rlz.inputInfo.update(self.samplerInfo) # reset distribution memory - for key in self.distDict: - if self.distDict[key].getMemory(): - self.distDict[key].reset() + for _, dist in self.distDict.items(): + if dist.getMemory(): + dist.reset() ##### CHECK RESTART ##### # check each rlz for restart, and if so, fill its values and submit it as complete for r, rlz in enumerate(rlzBatch): @@ -1129,12 +1134,11 @@ def generateInput(self, model, modelInput): # inputs = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) # outputs = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) # TODO method for getting Realization object out of DataObjects? - varvals = dict((var, np.atleast_1d(inExisting[var])) for var in self.restartData.getVars()) - rlz.update(varvals) - rlz.isRestart = True + restartRlz = dict((var, np.atleast_1d(inExisting[var])) for var in self.restartData.getVars()) + rlz.setRestart(restartRlz) # END if restart # END loop over rlz for restart checking - return rlzBatch, None + return rlzBatch, modelInput def generateInputBatch(self, myInput, model, batchSize, projector=None): """ @@ -1146,7 +1150,7 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ - FIXME used? + FIXME # used? newInputs = [] while self.amIreadyToProvideAnInput() and (self.counter < batchSize): if projector is None: diff --git a/ravenframework/Steps/MultiRun.py b/ravenframework/Steps/MultiRun.py index b461e46d9a..7e237ef93e 100644 --- a/ravenframework/Steps/MultiRun.py +++ b/ravenframework/Steps/MultiRun.py @@ -73,7 +73,7 @@ def _initializeSampler(self, inDictionary): self.raiseADebug(f'for the role of sampler the item of class {inDictionary[self.samplerType].type} and name {inDictionary[self.samplerType].name} has been initialized') self.raiseADebug(f'Sampler initialization dictionary: {self._samplerInitDict}') - def _localInitializeStep(self, inDictionary): + def _localInitializeStep(self, stepEntities): """ This is the API for the local initialization of the children classes of step The inDictionary contains the instances (or list of instances if more than one is allowed) @@ -81,15 +81,21 @@ def _localInitializeStep(self, inDictionary): The role of _localInitializeStep is to call the initialize method if needed Remember after each initialization to put: self.raiseADebug('for the role "+key+" the item of class '+inDictionary['key'].type+' and name '+inDictionary['key'].name+' has been initialized') - @ In, inDictionary, dict, the initialization dictionary + @ In, stepEntities, dict, the entities from the Step definition @ Out, None """ - SingleRun._localInitializeStep(self, inDictionary) - # check that no input data objects are also used as outputs? - for out in inDictionary['Output']: + SingleRun._localInitializeStep(self, stepEntities) + # these all get re-used a lot, so assign shortcut labels + model = stepEntities['Model'] + inputs = stepEntities['Input'] + outputs = stepEntities['Output'] + sampler = stepEntities[self.samplerType] + jobHandler = stepEntities['jobHandler'] + # check that no input data objects are also used as outputs + for out in outputs: if out.type not in ['PointSet', 'HistorySet', 'DataSet']: continue - for inp in inDictionary['Input']: + for inp in inputs: if inp.type not in ['PointSet', 'HistorySet', 'DataSet']: continue if inp == out: @@ -97,35 +103,35 @@ def _localInitializeStep(self, inDictionary): + f'Step: "{self.name}", DataObject: "{out.name}"') self.counter = 0 self._samplerInitDict['externalSeeding'] = self.initSeed - self._initializeSampler(inDictionary) + self._initializeSampler(stepEntities) #generate lambda function list to collect the output without checking the type self._outputCollectionLambda = [] # set up output collection lambdas - for outIndex, output in enumerate(inDictionary['Output']): + for outIndex, output in enumerate(outputs): if not isinstance(output, OutStreamEntity): - if 'SolutionExport' in inDictionary and output.name == inDictionary['SolutionExport'].name: + if 'SolutionExport' in stepEntities and output.name == stepEntities['SolutionExport'].name: self._outputCollectionLambda.append((lambda x:None, outIndex)) else: - self._outputCollectionLambda.append( (lambda x: inDictionary['Model'].collectOutput(x[0],x[1]), outIndex) ) + self._outputCollectionLambda.append( (lambda x: model.collectOutput(x[0],x[1]), outIndex) ) else: self._outputCollectionLambda.append((lambda x: x[1].addOutput(), outIndex)) - self._registerMetadata(inDictionary) - self.raiseADebug(f'Generating input batch of size {inDictionary["jobHandler"].runInfoDict["batchSize"]}') + self._registerMetadata(stepEntities) + self.raiseADebug(f'Generating input batch of size {jobHandler.runInfoDict["batchSize"]}') # set up and run the first batch of samples # FIXME this duplicates a lot of code from _locatTakeAstepRun, which should be consolidated # first, check and make sure the model is ready - model = inDictionary['Model'] if isinstance(model,Models.ROM): if not model.amITrained: model.raiseAnError(RuntimeError, f'ROM model "{model.name}" has not been trained yet, so it cannot be sampled!'+\ ' Use a RomTrainer step to train it.') - for inputIndex in range(inDictionary['jobHandler'].runInfoDict['batchSize']): - if inDictionary[self.samplerType].amIreadyToProvideAnInput(): + # FIXME sample batching can give many runs, not necessarily matching (parallel) batchSize, so this "for" is misleading + for inputIndex in range(jobHandler.runInfoDict['batchSize']): + if sampler.amIreadyToProvideAnInput(): try: - newInput = self._findANewInputToRun(inDictionary[self.samplerType], inDictionary['Model'], inDictionary['Input'], inDictionary['Output'], inDictionary['jobHandler']) - if newInput is not None: - inDictionary["Model"].submit(newInput, inDictionary[self.samplerType].type, inDictionary['jobHandler'], **copy.deepcopy(inDictionary[self.samplerType].inputInfo)) - self.raiseAMessage(f'Submitted input {inputIndex+1}') + batch, modelInp = sampler.generateInput(model, inputs) + # OLD batch, modelInp = self._findANewInputToRun(inDictionary[self.samplerType], inDictionary['Model'], inDictionary['Input'], inDictionary['Output'], inDictionary['jobHandler']) + model.submit(batch, modelInp, sampler.type, jobHandler) + self.raiseAMessage(f'Submitted input batch {inputIndex+1}') except utils.NoMoreSamplesNeeded: self.raiseAMessage('Sampler returned "NoMoreSamplesNeeded". Continuing...') @@ -287,52 +293,28 @@ def _addNewRuns(self, sampler, model, inputs, outputs, jobHandler, inDictionary, @ Out, None """ isEnsemble = isinstance(model, Models.EnsembleModel) + if verbose: + self.raiseADebug('Testing if the sampler is ready to generate a new input') # In order to ensure that the queue does not grow too large, we will # employ a threshold on the number of jobs the jobHandler can take, # in addition, we cannot provide more jobs than the sampler can provide. # So, we take the minimum of these two values. - if verbose: - self.raiseADebug('Testing if the sampler is ready to generate a new input') for _ in range(min(jobHandler.availability(isEnsemble), sampler.endJobRunnable())): if sampler.amIreadyToProvideAnInput(): try: - newInput = self._findANewInputToRun(sampler, model, inputs, outputs, jobHandler) - if newInput is not None: - model.submit(newInput, inDictionary[self.samplerType].type, jobHandler, **copy.deepcopy(sampler.inputInfo)) + batch, modelInp = sampler.generateInput(model, inputs) + model.submit(batch, modelInp, inDictionary[self.samplerType].type, jobHandler) except utils.NoMoreSamplesNeeded: self.raiseAMessage(' ... Sampler returned "NoMoreSamplesNeeded". Continuing...') break else: if verbose: - self.raiseADebug(' ... sampler has no new inputs currently.') + self.raiseADebug(' ... sampler has no new inputs to provide yet.') break else: if verbose: self.raiseADebug(' ... no available JobHandler spots currently (or the Sampler is done.)') - def _findANewInputToRun(self, sampler, model, inputs, outputs, jobHandler): - """ - Repeatedly calls Sampler until a new run is found or "NoMoreSamplesNeeded" is raised. - @ In, sampler, Sampler, the sampler in charge of generating the sample - @ In, model, Model, the model in charge of evaluating the sample - @ In, inputs, object, the raven object used as the input in this step - (i.e., a DataObject, File, or Database, I guess? Maybe these should all - inherit from some base "Data" so that we can ensure a consistent - interface for these?) - @ In, outputs, object, the raven object used as the output in this step - (i.e., a DataObject, File, or Database, I guess? Maybe these should all - inherit from some base "Data" so that we can ensure a consistent - interface for these?) - @ In, jobHandler, object, the raven object used to handle jobs - @ Out, newInp, RealizationBatch, list containing the new inputs (or None if a restart) - """ - batch, modelInp = sampler.generateInput(model, inputs) - for rlz in batch: - if rlz.isRestart: - # "submit" the finished run - jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) - return batch, modelInp - def flushStep(self): """ Reset Step attributes to allow rerunning a workflow From 93d429c2fe401912d376feba737a16070730460e Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 6 Nov 2024 10:59:50 -0700 Subject: [PATCH 07/18] sampler restart tests working --- ravenframework/Models/ExternalModel.py | 5 +- ravenframework/Realizations/Realization.py | 18 +- ravenframework/Runners/InternalRunner.py | 4 - ravenframework/Runners/PassthroughRunner.py | 6 +- .../Samplers/AdaptiveDynamicEventTree.py | 77 +++---- ravenframework/Samplers/AdaptiveSampler.py | 8 +- ravenframework/Samplers/AdaptiveSobol.py | 33 +-- ravenframework/Samplers/AdaptiveSparseGrid.py | 25 +-- ravenframework/Samplers/CustomSampler.py | 54 ++--- ravenframework/Samplers/DynamicEventTree.py | 198 +++++++++--------- ravenframework/Samplers/EnsembleForward.py | 27 +-- ravenframework/Samplers/FactorialDesign.py | 17 +- ravenframework/Samplers/LimitSurfaceSearch.py | 71 +++---- .../Samplers/ResponseSurfaceDesign.py | 7 +- ravenframework/Samplers/Sampler.py | 13 +- ravenframework/Samplers/Sobol.py | 29 +-- .../Samplers/SparseGridCollocation.py | 27 +-- ravenframework/Samplers/Stratified.py | 65 +++--- 18 files changed, 353 insertions(+), 331 deletions(-) diff --git a/ravenframework/Models/ExternalModel.py b/ravenframework/Models/ExternalModel.py index 2ec421c89e..fb8c993c6f 100644 --- a/ravenframework/Models/ExternalModel.py +++ b/ravenframework/Models/ExternalModel.py @@ -324,10 +324,13 @@ def evaluateSample(self, myInput, samplerType, rlz): ## do it in this order to make sure only the right variables are overwritten ## first inRun, which has everything from self.* and Input[*] # FIXME should this be a proper Realization object? Should we update the one we already have? + # -> NOTE that RAVEN dataobjects currently expect all the inputInfo keys in the sample space, + # so that's a big difference from a Realization object res = dict((var, np.atleast_1d(val)) for var, val in inRun.items()) ## then result, which has the expected outputs and possibly changed inputs res.update(dict((var, np.atleast_1d(val)) for var, val in result.items())) - ## then get the metadata from kwargs + ## then get the metadata, values from the input realization + rlzData = rlz.asDict() res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.inputInfo.items())) ## then get the inputs from SampledVars (overwriting any other entries) res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.items())) diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py index 4bbf60adf3..9abad71246 100644 --- a/ravenframework/Realizations/Realization.py +++ b/ravenframework/Realizations/Realization.py @@ -14,7 +14,7 @@ """ Realizations carry sampled information between entities in RAVEN """ - +import numpy as np class Realization: """ A mapping container specifically for carrying data between entities in RAVEN, such @@ -30,7 +30,6 @@ def __init__(self): self._values = {} # mapping of variables to their values self.indexMap = {} # information about dimensionality of variables self.labels = {} # custom labels for tracking, set externally - self.batchSize = 0 # not a batch, easy way to check self.isRestart = False # True if model was not run, but data was taken from restart self.inputInfo = {'SampledVars': {}, # additional information about this realization 'SampledVarsPb': {}, # point probability information for this realization @@ -49,6 +48,21 @@ def setRestart(self, varVals): self.update(varVals) self.isRestart = True + def asDict(self): + """ + Collects all the information this Realization knows about and returns it + Also assures that all entries are at least 1d np arrays + @ In, None + @ Out, info, dict, all the things + """ + # TODO this is one-way, no easy way to unpack labels and input info back into rlz form + # TODO any deep copies needed? Let's assume no. + info = dict((var, np.atleast_1d(val)) for var, val in self._values.items()) + info['_indexMap'] = np.atleast_1d(self.indexMap) + info.update(dict((key, np.atleast_1d(val)) for key, val in self.inputInfo.items())) + info.update(dict((label, np.atleast_1d(val)) for label, val in self.labels.items())) + return info + ######## # # dict-like members diff --git a/ravenframework/Runners/InternalRunner.py b/ravenframework/Runners/InternalRunner.py index f2f88c5ce6..b65aadd4d0 100644 --- a/ravenframework/Runners/InternalRunner.py +++ b/ravenframework/Runners/InternalRunner.py @@ -40,10 +40,6 @@ def __init__(self, functionArgs, functionToRun, **kwargs): super().__init__(**kwargs) ## Other parameters passed at initialization - print('DEBUGG internalRunner functionArgs:') - print('DEBUGG ...', type(functionArgs)) - for x in functionArgs: - print('DEBUGG ... ...', x) self.args = copy.copy(functionArgs) self.functionToRun = functionToRun diff --git a/ravenframework/Runners/PassthroughRunner.py b/ravenframework/Runners/PassthroughRunner.py index d72a9ba891..f38d8ea7ff 100644 --- a/ravenframework/Runners/PassthroughRunner.py +++ b/ravenframework/Runners/PassthroughRunner.py @@ -15,7 +15,6 @@ Module for Passthrough Runner class, which skips evaluation. Used particularly for restarting Samplers from existing data currently. """ -import numpy as np from .Runner import Runner class PassthroughRunner(Runner): @@ -56,10 +55,7 @@ def getEvaluation(self): @ In, None @ Out, result, dict, results """ - result = {} - result.update(dict((key, np.atleast_1d(value)) for key, value in self._data['inputs'].items())) - result.update(dict((key, np.atleast_1d(value)) for key, value in self._data['outputs'].items())) - result.update(dict((key, np.atleast_1d(value)) for key, value in self._data['metadata'].items())) + result = self._data.asDict() return result def start(self): diff --git a/ravenframework/Samplers/AdaptiveDynamicEventTree.py b/ravenframework/Samplers/AdaptiveDynamicEventTree.py index e37ee36e55..7de724206b 100644 --- a/ravenframework/Samplers/AdaptiveDynamicEventTree.py +++ b/ravenframework/Samplers/AdaptiveDynamicEventTree.py @@ -139,10 +139,10 @@ def _checkIfStartAdaptive(self): if not self.startAdaptive: break - def _checkClosestBranch(self): + def _checkClosestBranch(self, rlz): """ Function that checks the closest branch already evaluated - @ In, None + @ In, rlz, Realization, dict-like object to fill with sample @ Out, returnTuple, tuple, closest branch info: - if self.hybridDETstrategy and branch found -> returnTuple = (valBranch,cdfValues,treer) - if self.hybridDETstrategy and branch not found -> returnTuple = (None,cdfValues,treer) @@ -156,7 +156,7 @@ def _checkClosestBranch(self): cdfValues = {} self.raiseADebug("Check for closest branch:") self.raiseADebug("_"*50) - for key,value in self.values.items(): + for key,value in rlz.items(): self.raiseADebug("Variable name : "+str(key)) self.raiseADebug("Distribution name: "+str(self.toBeSampled[key])) if key not in self.epistemicVariables.keys(): @@ -176,7 +176,7 @@ def _checkClosestBranch(self): for tree in self.TreeInfo.values(): epistemicVars = tree.getrootnode().get("hybridsamplerCoordinate")[0]['SampledVars'] for key in self.epistemicVariables.keys(): - compareDict[key] = utils.compare(epistemicVars[key],self.values[key]) + compareDict[key] = utils.compare(epistemicVars[key],rlz[key]) if all(compareDict.values()): # we found the right epistemic tree self.foundEpistemicTree, treer = True, tree @@ -259,9 +259,10 @@ def _retrieveBranchInfo(self,branch): info['parentNode'] = branch return info - def _constructEndInfoFromBranch(self,model, myInput, info, cdfValues): + def _constructEndInfoFromBranch(self, rlz, model, myInput, info, cdfValues): """ Method to construct the end information from the 'info' inputted + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, Models object, the model that is used to explore the input space (e.g. a code, like RELAP-7) @ In, myInput, list, list of inputs for the Models object (passed through the Steps XML block) @ In, info, dict, dictionary of information at the end of a branch (information collected by the method _retrieveBranchInfo) @@ -269,7 +270,6 @@ def _constructEndInfoFromBranch(self,model, myInput, info, cdfValues): @ Out, None """ endInfo = info['parentNode'].get('endInfo') - #del self.inputInfo self.counter += 1 self.branchCountOnLevel = info['actualBranchOnLevel']+1 # Get Parent node name => the branch name is creating appending to this name a comma and self.branchCountOnLevel counter @@ -318,7 +318,7 @@ def _constructEndInfoFromBranch(self,model, myInput, info, cdfValues): info['parentNode'].appendBranch(subGroup) # Fill the values dictionary that will be passed into the model in order to create an input # In this dictionary the info for changing the original input is stored - self.inputInfo.update({'prefix':rname,'endTimeStep':info['parentNode'].get('actualEndTimeStep'), + rlz.inputInfo.update({'prefix':rname,'endTimeStep':info['parentNode'].get('actualEndTimeStep'), 'branchChangedParam':subGroup.get('branchChangedParam'), 'branchChangedParamValue':subGroup.get('branchChangedParamValue'), 'conditionalPb':subGroup.get('conditionalPb'), @@ -333,35 +333,35 @@ def _constructEndInfoFromBranch(self,model, myInput, info, cdfValues): # it exists only in case an hybridDET strategy is activated precSampled = info['parentNode'].get('hybridsamplerCoordinate') if precSampled: - self.inputInfo['hybridsamplerCoordinate' ] = copy.deepcopy(precSampled) + rlz.inputInfo['hybridsamplerCoordinate' ] = copy.deepcopy(precSampled) subGroup.add('hybridsamplerCoordinate', copy.copy(precSampled)) # The probability Thresholds are stored here in the cdfValues dictionary... We are sure that they are whitin the ones defined in the grid # check is not needed - self.inputInfo['initiatorDistribution' ] = [self.toBeSampled[key] for key in cdfValues.keys()] - self.inputInfo['PbThreshold' ] = list(cdfValues.values()) - self.inputInfo['ValueThreshold' ] = [self.distDict[key].ppf(value) for key,value in cdfValues.items()] - self.inputInfo['SampledVars' ] = {} - self.inputInfo['SampledVarsPb' ] = {} + rlz.inputInfo['initiatorDistribution' ] = [self.toBeSampled[key] for key in cdfValues.keys()] + rlz.inputInfo['PbThreshold' ] = list(cdfValues.values()) + rlz.inputInfo['ValueThreshold' ] = [self.distDict[key].ppf(value) for key,value in cdfValues.items()] + rlz.inputInfo['SampledVars' ] = {} + rlz.inputInfo['SampledVarsPb' ] = {} for varname in self.standardDETvariables: - self.inputInfo['SampledVars' ][varname] = self.distDict[varname].ppf(cdfValues[varname]) - self.inputInfo['SampledVarsPb'][varname] = cdfValues[varname] + rlz.inputInfo['SampledVars' ][varname] = self.distDict[varname].ppf(cdfValues[varname]) + rlz.inputInfo['SampledVarsPb'][varname] = cdfValues[varname] # constant variables - self._constantVariables() + self._constantVariables(rlz) if precSampled: for precSample in precSampled: - self.inputInfo['SampledVars' ].update(precSample['SampledVars']) - self.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) + rlz.inputInfo['SampledVars' ].update(precSample['SampledVars']) + rlz.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) pointPb = reduce(mul,[it for sub in [pre['SampledVarsPb'].values() for pre in precSampled ] for it in sub] if precSampled else [1.0]) - self.inputInfo['PointProbability' ] = pointPb*subGroup.get('conditionalPb') - self.inputInfo['ProbabilityWeight'] = self.inputInfo['PointProbability' ] - self.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in self.inputInfo['SampledVarsPb'].items()}) + rlz.inputInfo['PointProbability' ] = pointPb*subGroup.get('conditionalPb') + rlz.inputInfo['ProbabilityWeight'] = rlz.inputInfo['PointProbability' ] + rlz.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in rlz.inputInfo['SampledVarsPb'].items()}) # add additional edits if needed - model.getAdditionalInputEdits(self.inputInfo) + model.getAdditionalInputEdits(rlz.inputInfo) # Add the new input path into the RunQueue system - newInputs = {'args':[str(self.type)], 'kwargs': dict(self.inputInfo)} + newInputs = {'args':[str(self.type)], 'kwargs': dict(rlz.inputInfo)} self.RunQueue['queue'].append(newInputs) - self.RunQueue['identifiers'].append(self.inputInfo['prefix']) - for key,value in self.inputInfo.items(): + self.RunQueue['identifiers'].append(rlz.inputInfo['prefix']) + for key,value in rlz.inputInfo.items(): subGroup.add(key,copy.copy(value)) if endInfo: subGroup.add('endInfo',copy.deepcopy(endInfo)) @@ -403,24 +403,25 @@ def localStillReady(self,ready): return False return detReady - def localGenerateInput(self,model,myInput): + def localGenerateInput(self, rlz, model, myInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ - if self.startAdaptive == True and self.adaptiveReady == True: - LimitSurfaceSearch.localGenerateInput(self,model,myInput) + if self.startAdaptive and self.adaptiveReady: + LimitSurfaceSearch.localGenerateInput(self, rlz, model, myInput) #the adaptive sampler created the next point sampled vars #find the closest branch if self.hybridDETstrategy is not None: - closestBranch, cdfValues, treer = self._checkClosestBranch() + closestBranch, cdfValues, treer = self._checkClosestBranch(rlz) else: - closestBranch, cdfValues = self._checkClosestBranch() + closestBranch, cdfValues = self._checkClosestBranch(rlz) if closestBranch is None: self.raiseADebug('An usable branch for next candidate has not been found => create a parallel branch!') # add pbthresholds in the grid @@ -438,7 +439,7 @@ def localGenerateInput(self,model,myInput): self.investigatedPoints.append(investigatedPoint) if closestBranch: info = self._retrieveBranchInfo(closestBranch) - self._constructEndInfoFromBranch(model, myInput, info, cdfValues) + self._constructEndInfoFromBranch(rlz, model, myInput, info, cdfValues) else: # create a new tree, since there are no branches that are close enough to the adaptive request elm = ETS.HierarchicalNode(self.name + '_' + str(len(self.TreeInfo.keys())+1)) @@ -464,15 +465,15 @@ def localGenerateInput(self,model,myInput): for hybridStrategy in hybridSampled: for key in self.epistemicVariables.keys(): if key in hybridStrategy['SampledVars'].keys(): - self.raiseADebug("epistemic var " + str(key)+" value = "+str(self.values[key])) - hybridStrategy['SampledVars'][key] = copy.copy(self.values[key]) - hybridStrategy['SampledVarsPb'][key] = self.distDict[key].pdf(self.values[key]) + self.raiseADebug("epistemic var " + str(key)+" value = "+str(rlz[key])) + hybridStrategy['SampledVars'][key] = copy.copy(rlz[key]) + hybridStrategy['SampledVarsPb'][key] = self.distDict[key].pdf(rlz[key]) hybridStrategy['prefix'] = len(self.TreeInfo.values())+1 # TODO: find a strategy to recompute the probability weight here (for now == PointProbability) - hybridStrategy['PointProbability'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) - hybridStrategy['ProbabilityWeight'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) + hybridStrategy['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + hybridStrategy['ProbabilityWeight'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) elm.add('hybridsamplerCoordinate', hybridSampled) - self.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in self.inputInfo['SampledVarsPb'].items()}) + rlz.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in rlz.inputInfo['SampledVarsPb'].items()}) # Here it is stored all the info regarding the DET => we create the info for all the branchings and we store them self.TreeInfo[self.name + '_' + str(len(self.TreeInfo.keys())+1)] = ETS.HierarchicalTree(elm) self._createRunningQueueBeginOne(self.TreeInfo[self.name + '_' + str(len(self.TreeInfo.keys()))],branchedLevel, model,myInput) diff --git a/ravenframework/Samplers/AdaptiveSampler.py b/ravenframework/Samplers/AdaptiveSampler.py index ba01d3490d..4555dcaa11 100644 --- a/ravenframework/Samplers/AdaptiveSampler.py +++ b/ravenframework/Samplers/AdaptiveSampler.py @@ -91,17 +91,17 @@ def _registerSample(self, prefix, info): self.checkIdentifiersPresent(info) self._prefixToIdentifiers[prefix] = info - def _checkSample(self): + def _checkSample(self, rlz): """ Check sample consistency. - @ In, None + @ In, rlz, Realization, dict-like object to fill with sample @ Out, None """ - Sampler._checkSample(self) + Sampler._checkSample(self, rlz) # make sure the prefix is registered for tracking # but if there's no identifying information, skip this check if self._registeredIdentifiers: - prefix = self.inputInfo['prefix'] + prefix = rlz.inputInfo['prefix'] if not prefix in self._prefixToIdentifiers: self.raiseAnError(RuntimeError, f'Prefix "{prefix}" has not been tracked in adaptive sampling!') diff --git a/ravenframework/Samplers/AdaptiveSobol.py b/ravenframework/Samplers/AdaptiveSobol.py index e42b20f9f0..f4c393082d 100644 --- a/ravenframework/Samplers/AdaptiveSobol.py +++ b/ravenframework/Samplers/AdaptiveSobol.py @@ -317,12 +317,13 @@ def localStillReady(self,ready): #otherwise, we can submit points! return True - def localGenerateInput(self,model,oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None @@ -330,7 +331,7 @@ def localGenerateInput(self,model,oldInput): #note: pointsNeeded is the collection of points needed by sampler, # while neededPoints is just the reference point that needs running #if there's a point that THIS sampler needs, prioritize it - self.inputInfo['ProbabilityWeight'] = 1.0 + rlz.inputInfo['ProbabilityWeight'] = 1.0 if len(self.neededPoints)>0: pt = self.neededPoints.pop() #otherwise, take from the highest-impact sampler's needed points @@ -358,9 +359,9 @@ def localGenerateInput(self,model,oldInput): # compute the SampledVarsPb for 1-D distribution if self.variables2distributionsMapping[varName]['totDim'] == 1: for key in varName.strip().split(','): - self.values[key] = pt[v] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) - self.inputInfo['ProbabilityWeight-'+varName] = self.inputInfo['SampledVarsPb'][varName] + rlz[key] = pt[v] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) + rlz.inputInfo['ProbabilityWeight-'+varName] = rlz.inputInfo['SampledVarsPb'][varName] # compute the SampledVarsPb for N-D distribution elif self.variables2distributionsMapping[varName]['totDim'] > 1 and self.variables2distributionsMapping[varName]['reducedDim'] == 1: dist = self.variables2distributionsMapping[varName]['name'] @@ -379,12 +380,12 @@ def localGenerateInput(self,model,oldInput): else: self.raiseAnError(IOError,'The variables ' + var + ' listed in adaptive sobol sampler, but not used in the ROM!' ) for key in var.strip().split(','): - self.values[key] = pt[location] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) - self.inputInfo['ProbabilityWeight-'+dist] = self.inputInfo['SampledVarsPb'][varName] - self.inputInfo['ProbabilityWeight']*=self.inputInfo['ProbabilityWeight-'+dist] - self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['SamplerType'] = 'Adaptive Sparse Grids for Sobol' + rlz[key] = pt[location] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) + rlz.inputInfo['ProbabilityWeight-'+dist] = rlz.inputInfo['SampledVarsPb'][varName] + rlz.inputInfo['ProbabilityWeight']*=rlz.inputInfo['ProbabilityWeight-'+dist] + rlz.inputInfo['PointProbability'] = reduce(mul,rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['SamplerType'] = 'Adaptive Sparse Grids for Sobol' def _addPointToDataObject(self,subset,point): """ Adds a cut point to the data object for the subset sampler. @@ -525,14 +526,16 @@ def _earlyExit(self): #set up HDMRRom for training self._finalizeROM() - def _finalizeROM(self, rom=None, include=[]): + def _finalizeROM(self, rom=None, include=None): """ Delivers necessary structures to the HDMRRom object @ In, rom, HDMRRom object, optional, rom to finalize before training, defaults to target rom @ In, include, list[str], optional, subsets to optionally exclude from trimming @ Out, None """ - if rom == None: + if include is None: + include = [] + if rom is None: rom = self.ROM initDict = {'ROMs':None, # multitarget requires setting individually, below 'SG':self.SQs, @@ -544,7 +547,7 @@ def _finalizeROM(self, rom=None, include=[]): #initialize each HDMRRom object in the ROM initDict['ROMs'] = copy.deepcopy(self.ROMs) #remove unfinished subsets - for subset in self.ROMs.keys(): + for subset in self.ROMs: if subset not in self.useSet.keys() and subset not in include: del initDict['ROMs'][subset] rom.supervisedContainer[0].initialize(initDict) diff --git a/ravenframework/Samplers/AdaptiveSparseGrid.py b/ravenframework/Samplers/AdaptiveSparseGrid.py index 343a210317..01f764c04f 100644 --- a/ravenframework/Samplers/AdaptiveSparseGrid.py +++ b/ravenframework/Samplers/AdaptiveSparseGrid.py @@ -316,25 +316,26 @@ def localStillReady(self, ready, skipJobHandlerCheck=False): return True - def localGenerateInput(self, model, oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ - self.inputInfo['ProbabilityWeight'] = 1.0 + rlz.inputInfo['ProbabilityWeight'] = 1.0 pt = self.neededPoints.pop() self.submittedNotCollected.append(pt) for v, varName in enumerate(self.sparseGrid.varNames): # compute the SampledVarsPb for 1-D distribution if self.variables2distributionsMapping[varName]['totDim'] == 1: for key in varName.strip().split(','): - self.values[key] = pt[v] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) - self.inputInfo['ProbabilityWeight-'+varName] = self.inputInfo['SampledVarsPb'][varName] + rlz[key] = pt[v] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) + rlz.inputInfo['ProbabilityWeight-'+varName] = rlz.inputInfo['SampledVarsPb'][varName] # compute the SampledVarsPb for N-D distribution elif self.variables2distributionsMapping[varName]['totDim'] > 1 and self.variables2distributionsMapping[varName]['reducedDim'] == 1: dist = self.variables2distributionsMapping[varName]['name'] @@ -353,12 +354,12 @@ def localGenerateInput(self, model, oldInput): else: self.raiseAnError(IOError,'The variables ' + var + ' listed in sparse grid collocation sampler, but not used in the ROM!' ) for key in var.strip().split(','): - self.values[key] = pt[location] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) - self.inputInfo[f'ProbabilityWeight-{dist}'] = self.inputInfo['SampledVarsPb'][varName] - self.inputInfo['ProbabilityWeight'] *= self.inputInfo[f'ProbabilityWeight-{dist}'] - self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['SamplerType'] = self.type + rlz[key] = pt[location] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) + rlz.inputInfo[f'ProbabilityWeight-{dist}'] = rlz.inputInfo['SampledVarsPb'][varName] + rlz.inputInfo['ProbabilityWeight'] *= rlz.inputInfo[f'ProbabilityWeight-{dist}'] + rlz.inputInfo['PointProbability'] = reduce(mul,rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['SamplerType'] = self.type def localFinalizeActualSampling(self, jobObject, model, myInput): """ diff --git a/ravenframework/Samplers/CustomSampler.py b/ravenframework/Samplers/CustomSampler.py index 15b6f1f05c..eb607a38b3 100644 --- a/ravenframework/Samplers/CustomSampler.py +++ b/ravenframework/Samplers/CustomSampler.py @@ -231,23 +231,26 @@ def localInitialize(self): if self.batch > 1: self.addMetaKeys(["batchId"]) - def localGenerateInput(self, model, myInput): + def localGenerateInput(self, rlzBatch, model, myInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlzBatch, RealizationBatch, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ if self.batch > 1: - self.inputInfo['batchMode'] = True + # OLD rlz.inputInfo['batchMode'] = True + batchMode = True batchData = [] self.batchId += 1 else: - self.inputInfo['batchMode'] = False - for _ in range(self.batch): + # OLD rlz.inputInfo['batchMode'] = False + batchMode = False + for rlz in rlzBatch: if self.indexes is None: index = self.counter - 1 else: @@ -256,23 +259,23 @@ def localGenerateInput(self, model, myInput): break if self.readingFrom == 'DataObject': # data is stored as slices of a data object, so take from that - rlz = self.pointsToSample[index] + rlzData = self.pointsToSample[index] for var in self.toBeSampled: for subVar in var.split(','): subVar = subVar.strip() sourceName = self.nameInSource[subVar] # get the value(s) for the variable for this realization - self.values[subVar] = mathUtils.npZeroDToEntry(rlz[sourceName].values) + rlz[subVar] = mathUtils.npZeroDToEntry(rlzData[sourceName].values) # get supporting indices (e.g. 'time') - for dim in rlz.dims: - if dim not in self.values: - self.values[dim] = rlz[self.nameInSource.get(dim, dim)] + for dim in rlzData.dims: + if dim not in rlz: + rlz[dim] = rlzData[self.nameInSource.get(dim, dim)] # set the probability weight due to this variable (default to 1) pbWtName = 'ProbabilityWeight-' - self.inputInfo[pbWtName+subVar] = rlz.get(pbWtName+sourceName,1.0) + rlz.inputInfo[pbWtName+subVar] = rlzData.get(pbWtName+sourceName,1.0) # get realization-level required meta information, or default to 1 for meta in ['PointProbability', 'ProbabilityWeight']: - self.inputInfo[meta] = rlz.get(meta, 1.0) + rlz.inputInfo[meta] = rlzData.get(meta, 1.0) elif self.readingFrom == 'File': # data is stored in file, so we already parsed the values # create values dictionary @@ -280,21 +283,24 @@ def localGenerateInput(self, model, myInput): for subVar in var.split(','): subVar = subVar.strip() # assign the custom sampled variables values to the sampled variables - self.values[subVar] = self.pointsToSample[subVar][index] + rlz[subVar] = self.pointsToSample[subVar][index] # This is the custom sampler, assign the ProbabilityWeights based on the provided values - self.inputInfo[f'ProbabilityWeight-{subVar}'] = self.infoFromCustom[f'ProbabilityWeight-{subVar}'][index] + rlz.inputInfo[f'ProbabilityWeight-{subVar}'] = self.infoFromCustom[f'ProbabilityWeight-{subVar}'][index] # Construct probabilities based on the user provided information - self.inputInfo['PointProbability'] = self.infoFromCustom['PointProbability'][index] - self.inputInfo['ProbabilityWeight'] = self.infoFromCustom['ProbabilityWeight'][index] - self.values['_indexMap'] = self.sourceIndexMap - self.inputInfo['SamplerType'] = 'Custom' - if self.inputInfo['batchMode']: - self.inputInfo['SampledVars'] = self.values - self.inputInfo['batchId'] = self.name + str(self.batchId) - batchData.append(copy.deepcopy(self.inputInfo)) + rlz.inputInfo['PointProbability'] = self.infoFromCustom['PointProbability'][index] + rlz.inputInfo['ProbabilityWeight'] = self.infoFromCustom['ProbabilityWeight'][index] + rlz.indexMap = self.sourceIndexMap + rlz.inputInfo['SamplerType'] = 'Custom' + # if rlz.inputInfo['batchMode']: + # rlz.inputInfo['SampledVars'] = rlz + # rlz.inputInfo['batchId'] = self.name + str(self.batchId) + # batchData.append(copy.deepcopy(rlz.inputInfo)) + if batchMode: self._incrementCounter() - if self.inputInfo['batchMode']: - self.inputInfo['batchInfo'] = {'nRuns': self.batch, 'batchRealizations': batchData, 'batchId': self.name + str(self.batchId)} + if batchMode: + rlzBatch.ID = self.name + str(self.batchId) + FIXME # where does this data go? Fix along with GA + #rlzBatch.inputInfo['batchInfo'] = {'nRuns': self.batch, 'batchRealizations': batchData, 'batchId': self.name + str(self.batchId)} def flush(self): """ diff --git a/ravenframework/Samplers/DynamicEventTree.py b/ravenframework/Samplers/DynamicEventTree.py index 1902ed8873..55ea3223b0 100644 --- a/ravenframework/Samplers/DynamicEventTree.py +++ b/ravenframework/Samplers/DynamicEventTree.py @@ -40,6 +40,7 @@ from ..utils import utils from ..utils import InputData, InputTypes from ..utils import TreeStructure as ETS +from ..Realizations import RealizationBatch #Internal Modules End------------------------------------------------------------------------------- class DynamicEventTree(Grid): @@ -325,7 +326,7 @@ def localFinalizeActualSampling(self,jobObject,model,myInput,genRunQueue=True): # Create the inputs and put them in the runQueue dictionary (if genRunQueue is true) if genRunQueue: - self._createRunningQueue(model,myInput) + self._createRunningQueue(rlz, model, myInput) self._endJobRunnable = len(self.RunQueue['identifiers']) return True @@ -436,11 +437,12 @@ def __readBranchInfo(self,outBase=None,currentWorkingDir=None): branchPresent = True return branchPresent - def _createRunningQueueBeginOne(self,rootTree,branchedLevel, model,myInput): + def _createRunningQueueBeginOne(self, rlz, rootTree, branchedLevel, model, myInput): """ Method to generate the running internal queue for one point in the epistemic space. It generates the initial information to instantiate the root of a Deterministic Dynamic Event Tree. + @ In, rlz, Realization, dict-like object to fill with sample @ In, rootTree, Node object, the rootTree of the single coordinate in the epistemic space. @ In, branchedLevel, dict, dictionary of the levels reached by the rootTree mapped in the internal grid dictionary (self.branchProbabilities) @ In, model, Models object, the model that is used to explore the input space (e.g. a code, like RELAP-7) @@ -448,76 +450,77 @@ def _createRunningQueueBeginOne(self,rootTree,branchedLevel, model,myInput): @ Out, None """ # add additional edits if needed - model.getAdditionalInputEdits(self.inputInfo) + model.getAdditionalInputEdits(rlz.inputInfo) precSampled = rootTree.getrootnode().get('hybridsamplerCoordinate') rootnode = rootTree.getrootnode() rname = rootnode.name rootnode.add('completedHistory', False) # Fill th values dictionary in if precSampled: - self.inputInfo['hybridsamplerCoordinate' ] = copy.deepcopy(precSampled) - self.inputInfo['prefix' ] = rname - self.inputInfo['standardDETvariables' ] = self.standardDETvariables - self.inputInfo['initiatorDistribution' ] = [] - self.inputInfo['triggeredVariable' ] = 'None' - self.inputInfo['happenedEventVarHistory' ] = [] - self.inputInfo['PbThreshold' ] = [] - self.inputInfo['ValueThreshold' ] = [] - self.inputInfo['branchChangedParam' ] = ['None'] - self.inputInfo['branchChangedParamValue' ] = ['None'] - self.inputInfo['startTime' ] = -sys.float_info.max - self.inputInfo['endTimeStep' ] = 0 - self.inputInfo['RAVEN_parentID' ] = "None" - self.inputInfo['RAVEN_isEnding' ] = True - self.inputInfo['conditionalPb' ] = 1.0 - self.inputInfo['happenedEvent' ] = False + rlz.inputInfo['hybridsamplerCoordinate' ] = copy.deepcopy(precSampled) + rlz.inputInfo['prefix' ] = rname + rlz.inputInfo['standardDETvariables' ] = self.standardDETvariables + rlz.inputInfo['initiatorDistribution' ] = [] + rlz.inputInfo['triggeredVariable' ] = 'None' + rlz.inputInfo['happenedEventVarHistory' ] = [] + rlz.inputInfo['PbThreshold' ] = [] + rlz.inputInfo['ValueThreshold' ] = [] + rlz.inputInfo['branchChangedParam' ] = ['None'] + rlz.inputInfo['branchChangedParamValue' ] = ['None'] + rlz.inputInfo['startTime' ] = -sys.float_info.max + rlz.inputInfo['endTimeStep' ] = 0 + rlz.inputInfo['RAVEN_parentID' ] = "None" + rlz.inputInfo['RAVEN_isEnding' ] = True + rlz.inputInfo['conditionalPb' ] = 1.0 + rlz.inputInfo['happenedEvent' ] = False for key in self.branchProbabilities.keys(): - self.inputInfo['initiatorDistribution'].append(self.toBeSampled[key]) + rlz.inputInfo['initiatorDistribution'].append(self.toBeSampled[key]) for key in self.branchProbabilities.keys(): - self.inputInfo['PbThreshold'].append(self.branchProbabilities[key][branchedLevel[key]]) + rlz.inputInfo['PbThreshold'].append(self.branchProbabilities[key][branchedLevel[key]]) for key in self.branchProbabilities.keys(): - self.inputInfo['ValueThreshold'].append(self.branchValues[key][branchedLevel[key]]) + rlz.inputInfo['ValueThreshold'].append(self.branchValues[key][branchedLevel[key]]) for varname in self.standardDETvariables: - self.inputInfo['SampledVars' ][varname] = self.branchValues[varname][branchedLevel[varname]] - self.inputInfo['SampledVarsPb'][varname] = self.branchProbabilities[varname][branchedLevel[varname] ] + rlz.inputInfo['SampledVars' ][varname] = self.branchValues[varname][branchedLevel[varname]] + rlz.inputInfo['SampledVarsPb'][varname] = self.branchProbabilities[varname][branchedLevel[varname] ] # constant variables self._constantVariables() if precSampled: for precSample in precSampled: - self.inputInfo['SampledVars' ].update(precSample['SampledVars']) - self.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) + rlz.inputInfo['SampledVars' ].update(precSample['SampledVars']) + rlz.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) pointPb = reduce(mul,[it for sub in [pre['SampledVarsPb'].values() for pre in precSampled ] for it in sub] if precSampled else [1.0]) - self.inputInfo['PointProbability' ] = pointPb - self.inputInfo['ProbabilityWeight'] = pointPb - self.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in self.inputInfo['SampledVarsPb'].items()}) + rlz.inputInfo['PointProbability' ] = pointPb + rlz.inputInfo['ProbabilityWeight'] = pointPb + rlz.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in rlz.inputInfo['SampledVarsPb'].items()}) ##### REDUNDANT FUNCTIONALS ##### - self._functionalVariables() + self._functionalVariables(rlz) # TODO batch or single? if(self.maxSimulTime): - self.inputInfo['endTime'] = self.maxSimulTime + rlz.inputInfo['endTime'] = self.maxSimulTime # Add some useful variable naming in the input - self.inputInfo.update(self.__createVariablesInfoForKwargs(model)) + rlz.inputInfo.update(self.__createVariablesInfoForKwargs(model)) # Add the new input path into the RunQueue system - newInputs = {'args':[str(self.type)], 'kwargs':dict(self.inputInfo)} - for key,value in self.inputInfo.items(): + newInputs = {'args':[str(self.type)], 'kwargs':dict(rlz.inputInfo)} + for key,value in rlz.inputInfo.items(): rootnode.add(key,copy.copy(value)) self.RunQueue['queue'].append(newInputs) - self.RunQueue['identifiers'].append(self.inputInfo['prefix']) - self.rootToJob[self.inputInfo['prefix']] = rname + self.RunQueue['identifiers'].append(rlz.inputInfo['prefix']) + self.rootToJob[rlz.inputInfo['prefix']] = rname del newInputs self.counter += 1 - def _createRunningQueueBegin(self,model,myInput): + def _createRunningQueueBegin(self, rlz, model, myInput): """ Method to generate the running internal queue for all the points in the epistemic space. It generates the initial information to instantiate the roots of all the N-D coordinates to construct multiple Deterministic Dynamic Event Trees. + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, Models object, the model that is used to explore the input space (e.g. a code, like RELAP-7) @ In, myInput, list, list of inputs for the Models object (passed through the Steps XML block) @ Out, None @@ -529,12 +532,13 @@ def _createRunningQueueBegin(self,model,myInput): # Get the initial branchedLevel dictionary (=> the list gets empty) branchedLevel = self.branchedLevel.pop(0) for rootTree in self.TreeInfo.values(): - self._createRunningQueueBeginOne(rootTree,branchedLevel, model,myInput) + self._createRunningQueueBeginOne(rlz,rootTree,branchedLevel, model,myInput) - def _createRunningQueueBranch(self,model,myInput,forceEvent=False): + def _createRunningQueueBranch(self, rlz, model, myInput, forceEvent=False): """ Method to generate the running internal queue right after a branch occurred It generates the the information to insatiate the branches' continuation of the Deterministic Dynamic Event Tree + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, Models object, the model that is used to explore the input space (e.g. a code, like RELAP-7) @ In, myInput, list, list of inputs for the Models object (passed through the Steps XML block) @ In, forceEvent, bool, if True the events are forced to happen (basically, the "unchanged event" is not created at all) @@ -629,83 +633,83 @@ def _createRunningQueueBranch(self,model,myInput,forceEvent=False): endInfo['parentNode'].appendBranch(subGroup) # Fill the values dictionary that will be passed into the model in order to create an input # In this dictionary the info for changing the original input is stored - self.inputInfo['prefix'] = rname - self.inputInfo['standardDETvariables'] = self.standardDETvariables - self.inputInfo['endTimeStep'] = endInfo['endTimeStep'] - self.inputInfo['branchChangedParam'] = subGroup.get('branchChangedParam') - self.inputInfo['branchChangedParamValue'] = subGroup.get('branchChangedParamValue') - self.inputInfo['conditionalPb'] = subGroup.get('conditionalPb') - self.inputInfo['startTime'] = endInfo['parentNode'].get('endTime') - self.inputInfo['RAVEN_parentID'] = subGroup.get('parent') - self.inputInfo['RAVEN_isEnding'] = True + rlz.inputInfo['prefix'] = rname + rlz.inputInfo['standardDETvariables'] = self.standardDETvariables + rlz.inputInfo['endTimeStep'] = endInfo['endTimeStep'] + rlz.inputInfo['branchChangedParam'] = subGroup.get('branchChangedParam') + rlz.inputInfo['branchChangedParamValue'] = subGroup.get('branchChangedParamValue') + rlz.inputInfo['conditionalPb'] = subGroup.get('conditionalPb') + rlz.inputInfo['startTime'] = endInfo['parentNode'].get('endTime') + rlz.inputInfo['RAVEN_parentID'] = subGroup.get('parent') + rlz.inputInfo['RAVEN_isEnding'] = True #'RAVEN_parentID','RAVEN_isEnding' - self.inputInfo['happenedEvent'] = subGroup.get('happenedEvent') - self.inputInfo['happenedEventVarHistory'] = subGroup.get('happenedEventVarHistory') + rlz.inputInfo['happenedEvent'] = subGroup.get('happenedEvent') + rlz.inputInfo['happenedEventVarHistory'] = subGroup.get('happenedEventVarHistory') # add additional edits if needed - model.getAdditionalInputEdits(self.inputInfo) + model.getAdditionalInputEdits(rlz.inputInfo) # add the newer branch name to the map self.rootToJob[rname] = self.rootToJob[subGroup.get('parent')] # check if it is a preconditioned DET sampling, if so add the relative information precSampled = endInfo['parentNode'].get('hybridsamplerCoordinate') if precSampled: - self.inputInfo['hybridsamplerCoordinate'] = copy.deepcopy(precSampled) + rlz.inputInfo['hybridsamplerCoordinate'] = copy.deepcopy(precSampled) subGroup.add('hybridsamplerCoordinate', precSampled) # Check if the distribution that just triggered hitted the last probability threshold . # In this case there is not a probability threshold that needs to be added in the input # for this particular distribution if not (branchedLevel[endInfo['branchDist']] >= len(self.branchProbabilities[endInfo['branchDist']])): - self.inputInfo['initiatorDistribution' ] = [self.toBeSampled[endInfo['branchDist']]] - self.inputInfo['triggeredVariable' ] = endInfo['branchDist'] - self.inputInfo['PbThreshold' ] = [self.branchProbabilities[endInfo['branchDist']][branchedLevel[endInfo['branchDist']]]] - self.inputInfo['ValueThreshold' ] = [self.branchValues[endInfo['branchDist']][branchedLevel[endInfo['branchDist']]]] + rlz.inputInfo['initiatorDistribution' ] = [self.toBeSampled[endInfo['branchDist']]] + rlz.inputInfo['triggeredVariable' ] = endInfo['branchDist'] + rlz.inputInfo['PbThreshold' ] = [self.branchProbabilities[endInfo['branchDist']][branchedLevel[endInfo['branchDist']]]] + rlz.inputInfo['ValueThreshold' ] = [self.branchValues[endInfo['branchDist']][branchedLevel[endInfo['branchDist']]]] # For the other distributions, we put the unbranched thresholds # Before adding these thresholds, check if the keyword 'initiatorDistribution' is present... # (In the case the previous if statement is true, this keyword is not present yet # Add it otherwise - if not ('initiatorDistribution' in self.inputInfo.keys()): - self.inputInfo['initiatorDistribution' ] = [] - self.inputInfo['PbThreshold' ] = [] - self.inputInfo['ValueThreshold' ] = [] - self.inputInfo['triggeredVariable' ] = 'None' + if not ('initiatorDistribution' in rlz.inputInfo.keys()): + rlz.inputInfo['initiatorDistribution' ] = [] + rlz.inputInfo['PbThreshold' ] = [] + rlz.inputInfo['ValueThreshold' ] = [] + rlz.inputInfo['triggeredVariable' ] = 'None' # Add the unbranched thresholds for key in self.branchProbabilities.keys(): if not (key in self.toBeSampled[endInfo['branchDist']]) and (branchedLevel[key] < len(self.branchProbabilities[key])): - self.inputInfo['initiatorDistribution'].append(self.toBeSampled[key]) + rlz.inputInfo['initiatorDistribution'].append(self.toBeSampled[key]) for key in self.branchProbabilities.keys(): if not (key in self.toBeSampled[endInfo['branchDist']]) and (branchedLevel[key] < len(self.branchProbabilities[key])): - self.inputInfo['PbThreshold' ].append(self.branchProbabilities[key][branchedLevel[key]]) - self.inputInfo['ValueThreshold'].append(self.branchValues[key][branchedLevel[key]]) - self.inputInfo['SampledVars'] = {} - self.inputInfo['SampledVarsPb'] = {} + rlz.inputInfo['PbThreshold' ].append(self.branchProbabilities[key][branchedLevel[key]]) + rlz.inputInfo['ValueThreshold'].append(self.branchValues[key][branchedLevel[key]]) + rlz.inputInfo['SampledVars'] = {} + rlz.inputInfo['SampledVarsPb'] = {} for varname in self.standardDETvariables: - self.inputInfo['SampledVars'][varname] = self.branchValues[varname][branchedLevel[varname]] - self.inputInfo['SampledVarsPb'][varname] = self.branchProbabilities[varname][branchedLevel[varname]] - self._constantVariables() + rlz.inputInfo['SampledVars'][varname] = self.branchValues[varname][branchedLevel[varname]] + rlz.inputInfo['SampledVarsPb'][varname] = self.branchProbabilities[varname][branchedLevel[varname]] + self._constantVariables(rlz) if precSampled: for precSample in precSampled: - self.inputInfo['SampledVars' ].update(precSample['SampledVars']) - self.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) + rlz.inputInfo['SampledVars' ].update(precSample['SampledVars']) + rlz.inputInfo['SampledVarsPb'].update(precSample['SampledVarsPb']) pointPb = reduce(mul,[it for sub in [pre['SampledVarsPb'].values() for pre in precSampled ] for it in sub] if precSampled else [1.0]) - self.inputInfo['PointProbability' ] = pointPb*subGroup.get('conditionalPb') - self.inputInfo['ProbabilityWeight'] = self.inputInfo['PointProbability' ] - self.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in self.inputInfo['SampledVarsPb'].items()}) + rlz.inputInfo['PointProbability' ] = pointPb*subGroup.get('conditionalPb') + rlz.inputInfo['ProbabilityWeight'] = rlz.inputInfo['PointProbability' ] + rlz.inputInfo.update({'ProbabilityWeight-'+key.strip():value for key,value in rlz.inputInfo['SampledVarsPb'].items()}) ##### REDUNDANT FUNCTIONALS ##### - self._functionalVariables() + self._functionalVariables(rlz) # Add some useful variable naming in the input - self.inputInfo.update(self.__createVariablesInfoForKwargs(model)) + rlz.inputInfo.update(self.__createVariablesInfoForKwargs(model)) # Add the new input path into the RunQueue system - newInputs = {'args': [str(self.type)], 'kwargs':dict(self.inputInfo)} + newInputs = {'args': [str(self.type)], 'kwargs':dict(rlz.inputInfo)} self.RunQueue['queue'].append(newInputs) - self.RunQueue['identifiers'].append(self.inputInfo['prefix']) - for key,value in self.inputInfo.items(): + self.RunQueue['identifiers'].append(rlz.inputInfo['prefix']) + for key,value in rlz.inputInfo.items(): subGroup.add(key,copy.copy(value)) popped = endInfo.pop('parentNode') subGroup.add('endInfo',copy.deepcopy(endInfo)) endInfo['parentNode'] = popped del branchedLevel - def _createRunningQueue(self, model, myInput, forceEvent=False): + def _createRunningQueue(self, rlz, model, myInput, forceEvent=False): """ Function to create and append new inputs to the queue. It uses all the containers have been updated by the previous functions @ In, model, Model instance, model instance that can be a Code type, ROM, etc. @@ -720,7 +724,7 @@ def _createRunningQueue(self, model, myInput, forceEvent=False): self._createRunningQueueBranch(model, myInput, forceEvent) else: # We construct the input for the first DET branch calculation' - self._createRunningQueueBegin(model, myInput) + self._createRunningQueueBegin(rlz, model, myInput) return def __createVariablesInfoForKwargs(self, model): @@ -743,8 +747,8 @@ def __createVariablesInfoForKwargs(self, model): model._replaceVariablesNamesWithAliasSystem(consts) varInfo['DETVariables'] = list(standardDet) hvars = {} - if 'hybridsamplerCoordinate' in self.inputInfo: - for precSample in self.inputInfo['hybridsamplerCoordinate']: + if 'hybridsamplerCoordinate' in rlz.inputInfo: + for precSample in rlz.inputInfo['hybridsamplerCoordinate']: hvars.update(precSample['SampledVars']) model._replaceVariablesNamesWithAliasSystem(hvars) varInfo['HDETVariables'] = list(hvars.keys()) @@ -782,7 +786,7 @@ def __getQueueElement(self): return jobInput - def generateInput(self,model,oldInput): + def generateInput(self, model, modelInput): """ This method has to be overwritten to provide the specialization for the specific sampler The model instance in might be needed since, especially for external codes, @@ -791,24 +795,27 @@ def generateInput(self,model,oldInput): @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, generateInput, (0,list), list containing the new inputs -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. """ - #NB: if someday the DET handles restarts as other samplers do in generateInput, the return code 1 indicates the result - # is stored in a restart data object, while 0 indicates a new run has been found. - #model.getAdditionalInputEdits(self.inputInfo) - return 0, self.localGenerateInput(model, oldInput) + rlzBatch = RealizationBatch(self.getBatchSize()) + # for now, we take one realization at a time, until such time as DET is reworked to + # provide batches of realizations. + rlz = rlzBatch[0] + modelInput = self.localGenerateInput(rlz, model, modelInput) + return rlz - def localGenerateInput(self,model,myInput): + def localGenerateInput(self, rlz, model, modelInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model - @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) + @ In, modelInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, newerInput, list, list of new inputs """ if self.counter <= 1: # If first branch input, create the queue - self._createRunningQueue(model, myInput) + self._createRunningQueue(rlz, model, modelInput) # retrieve the input from the queue newerInput = self.__getQueueElement() # If no inputs are present in the queue => a branch is finished @@ -816,10 +823,9 @@ def localGenerateInput(self,model,myInput): self.raiseADebug('A Branch ended!') ## It turns out the "newerInput" contains all of the information that should - ## be in inputInfo (which should actually be returned and not stored in the - ## sampler object, but all samplers do this for now) -- DPM 4/26/17 - self.inputInfo = newerInput['kwargs'] - return myInput + ## be in inputInfo -- DPM 4/26/17 + rlz.inputInfo = newerInput['kwargs'] + return modelInput def _generateDistributions(self,availableDist,availableFunc): """ diff --git a/ravenframework/Samplers/EnsembleForward.py b/ravenframework/Samplers/EnsembleForward.py index ced8046ee0..81e3b82f07 100644 --- a/ravenframework/Samplers/EnsembleForward.py +++ b/ravenframework/Samplers/EnsembleForward.py @@ -221,14 +221,15 @@ def localInitialize(self): # add meta data keys self.addMetaKeys(metadataKeys, params=metaParams) - def localGenerateInput(self, model, myInput): + def localGenerateInput(self, rlz, model, modelInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model - @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) + @ In, modelInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ index = self.gridEnsemble.returnPointAndAdvanceIterator(returnDict = True) @@ -237,20 +238,20 @@ def localGenerateInput(self, model, myInput): coordinate.append(self.samplersCombinations[samplingStrategy][int(index[samplingStrategy])]) for combination in coordinate: for key in combination: - if key not in self.inputInfo: - self.inputInfo[key] = combination[key] + if key not in rlz.inputInfo: + rlz.inputInfo[key] = combination[key] else: - if type(self.inputInfo[key]).__name__ == 'dict': - self.inputInfo[key].update(combination[key]) - self.inputInfo['PointProbability'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['ProbabilityWeight' ] = 1.0 - for key in self.inputInfo: + if type(rlz.inputInfo[key]).__name__ == 'dict': + rlz.inputInfo[key].update(combination[key]) + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['ProbabilityWeight' ] = 1.0 + for key in rlz.inputInfo: if key.startswith('ProbabilityWeight-'): - self.inputInfo['ProbabilityWeight' ] *= self.inputInfo[key] - self.inputInfo['SamplerType'] = 'EnsembleForward' + rlz.inputInfo['ProbabilityWeight' ] *= rlz.inputInfo[key] + rlz.inputInfo['SamplerType'] = 'EnsembleForward' # Update dependent variables - self._functionalVariables() + self._functionalVariables(rlz) # FIXME does this want batch or single? def flush(self): """ diff --git a/ravenframework/Samplers/FactorialDesign.py b/ravenframework/Samplers/FactorialDesign.py index 9a1d649af3..9bfea62082 100644 --- a/ravenframework/Samplers/FactorialDesign.py +++ b/ravenframework/Samplers/FactorialDesign.py @@ -18,19 +18,9 @@ @author: alfoa supercedes Samplers.py from alfoa """ -#for future compatibility with Python 3-------------------------------------------------------------- -from __future__ import division, print_function, unicode_literals, absolute_import -#End compatibility block for Python 3---------------------------------------------------------------- - -#External Modules------------------------------------------------------------------------------------ -import sys -#External Modules End-------------------------------------------------------------------------------- - -#Internal Modules------------------------------------------------------------------------------------ from .Grid import Grid import pyDOE3 as doe from ..utils import InputData, InputTypes -#Internal Modules End-------------------------------------------------------------------------------- class FactorialDesign(Grid): """ @@ -159,20 +149,21 @@ def localInitialize(self): self.designMatrix[self.designMatrix == -1] = 0 # convert all -1 in 0 => we can access to the grid info directly self.limit = self.designMatrix.shape[0] # the limit is the number of rows - def localGenerateInput(self,model,myInput): + def localGenerateInput(self, rlz, model, myInput): """ Function to select the next most informative point for refining the limit surface search. After this method is called, the self.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ if self.factOpt['algorithmType'] == 'full': - Grid.localGenerateInput(self,model, myInput) + Grid.localGenerateInput(self, rlz, model, myInput) else: self.gridCoordinate = self.designMatrix[self.counter - 1][:].tolist() - Grid.localGenerateInput(self,model, myInput) + Grid.localGenerateInput(self, rlz, model, myInput) # # diff --git a/ravenframework/Samplers/LimitSurfaceSearch.py b/ravenframework/Samplers/LimitSurfaceSearch.py index d973bd00d4..6f1d032934 100644 --- a/ravenframework/Samplers/LimitSurfaceSearch.py +++ b/ravenframework/Samplers/LimitSurfaceSearch.py @@ -169,6 +169,7 @@ def __init__(self): self.acceptedScoringParam = ['distance','distancePersistence'] self.acceptedBatchParam = ['none','naive','maxV','maxP'] + self.scores = None self.addAssemblerObject('ROM', InputData.Quantity.one_to_infinity) self.addAssemblerObject('Function', InputData.Quantity.zero_to_infinity) @@ -562,17 +563,16 @@ def localStillReady(self,ready): self.raiseAMessage(self.name + " converged!") return ready - def __scoreCandidates(self): + def __scoreCandidates(self, rlz): """ Compute the scores of the 'candidate set' which should be the currently extracted limit surface. - @ In, None + @ In, rlz, Realization, dict-like object to fill with sample @ Out, None """ # DM: This sequence gets used repetitively, so I am promoting it to its own # variable axisNames = [key.replace('','') for key in self.axisName] - matrixShape = self.limitSurfacePP.getTestMatrix().shape self.scores = OrderedDict() if self.scoringMethod.startswith('distance'): sampledMatrix = np.zeros((len(self.limitSurfacePP.getFunctionValue()[axisNames[0]])+len(self.hangingPoints[:,0]),len(self.axisName))) @@ -582,10 +582,10 @@ def __scoreCandidates(self): # The hanging point are added to the list of the already explored points # so as not to pick the same when in parallel for varIndex, _ in enumerate(axisNames): - self.inputInfo['distributionName'][self.axisName[varIndex]] = self.toBeSampled[self.axisName[varIndex]] - self.inputInfo['distributionType'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].type + rlz.inputInfo['distributionName'][self.axisName[varIndex]] = self.toBeSampled[self.axisName[varIndex]] + rlz.inputInfo['distributionType'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].type - for key, value in self.invPointPersistence.items(): + for key in self.invPointPersistence: if key != self.exceptionGrid and self.surfPoint[key] is not None: distance, _ = distanceTree.query(self.surfPoint[key]) # Different versions of scipy/numpy will yield different results on @@ -612,12 +612,13 @@ def __scoreCandidates(self): else: self.raiseAnError(NotImplementedError,self.scoringMethod + ' scoring method is not implemented yet') - def localGenerateInput(self,model,oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None @@ -628,8 +629,8 @@ def localGenerateInput(self,model,oldInput): # derivative probability is the lowest # create values dictionary - self.inputInfo['distributionName'] = {} #Used to determine which distribution to change if needed. - self.inputInfo['distributionType'] = {} #Used to determine which distribution type is used + rlz.inputInfo['distributionName'] = {} #Used to determine which distribution to change if needed. + rlz.inputInfo['distributionType'] = {} #Used to determine which distribution type is used self.raiseADebug('generating input') varSet=False @@ -639,7 +640,7 @@ def localGenerateInput(self,model,oldInput): if self.surfPoint is not None and len(self.surfPoint) > 0: if self.batchStrategy == 'none': - self.__scoreCandidates() + self.__scoreCandidates(rlz) maxDistance, maxGridId, maxId = 0.0, "", 0 for key, value in sorted(self.invPointPersistence.items()): if key != self.exceptionGrid and self.surfPoint[key] is not None: @@ -648,9 +649,9 @@ def localGenerateInput(self,model,oldInput): maxDistance, maxGridId, maxId = localMax, key, np.argmax(self.scores[key]) if maxDistance > 0.0: for varIndex, _ in enumerate([key.replace('','') for key in self.axisName]): - self.values[self.axisName[varIndex]] = copy.copy(float(self.surfPoint[maxGridId][maxId,varIndex])) - self.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) - self.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) + rlz[self.axisName[varIndex]] = copy.copy(float(self.surfPoint[maxGridId][maxId,varIndex])) + rlz.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) + rlz.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) varSet=True else: self.raiseADebug('Maximum score is 0.0') @@ -659,7 +660,7 @@ def localGenerateInput(self,model,oldInput): ## Initialize the queue with as many points as requested or as many as ## possible if len(self.toProcess) == 0: - self.__scoreCandidates() + self.__scoreCandidates(rlz) edges = [] flattenedSurfPoints = list() @@ -713,16 +714,16 @@ def localGenerateInput(self,model,oldInput): ## Select one sample selectedPoint = self.toProcess.pop() for varIndex, varName in enumerate(axisNames): - self.values[self.axisName[varIndex]] = float(selectedPoint[varIndex]) - self.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) - self.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) + rlz[self.axisName[varIndex]] = float(selectedPoint[varIndex]) + rlz.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) + rlz.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) varSet=True elif self.batchStrategy == 'naive': ######################################################################## ## Initialize the queue with as many points as requested or as many as ## possible if len(self.toProcess) == 0: - self.__scoreCandidates() + self.__scoreCandidates(rlz) sortedIndices = sorted(range(len(self.scores)), key=lambda k: self.scores[k],reverse=True) B = min(self.maxBatchSize,len(sortedIndices)) for idx in sortedIndices[0:B]: @@ -733,30 +734,30 @@ def localGenerateInput(self,model,oldInput): ## Select one sample selectedPoint = self.toProcess.pop() for varIndex, varName in enumerate(axisNames): - self.values[self.axisName[varIndex]] = float(selectedPoint[varIndex]) - self.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) - self.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(self.values[self.axisName[varIndex]]) + rlz[self.axisName[varIndex]] = float(selectedPoint[varIndex]) + rlz.inputInfo['SampledVarsPb'][self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) + rlz.inputInfo['ProbabilityWeight-'+self.axisName[varIndex]] = self.distDict[self.axisName[varIndex]].pdf(rlz[self.axisName[varIndex]]) varSet=True if not varSet: #here we are still generating the batch for key in sorted(self.distDict.keys()): if self.toleranceWeight=='cdf': - self.values[key] = self.distDict[key].ppf(float(randomUtils.random())) + rlz[key] = self.distDict[key].ppf(float(randomUtils.random())) else: - self.values[key] = self.distDict[key].lowerBound+(self.distDict[key].upperBound-self.distDict[key].lowerBound)*float(randomUtils.random()) - self.inputInfo['distributionName'][key] = self.toBeSampled[key] - self.inputInfo['distributionType'][key] = self.distDict[key].type - self.inputInfo['SampledVarsPb' ][key] = self.distDict[key].pdf(self.values[key]) - self.inputInfo['ProbabilityWeight-'+key] = self.distDict[key].pdf(self.values[key]) + rlz[key] = self.distDict[key].lowerBound+(self.distDict[key].upperBound-self.distDict[key].lowerBound)*float(randomUtils.random()) + rlz.inputInfo['distributionName'][key] = self.toBeSampled[key] + rlz.inputInfo['distributionType'][key] = self.distDict[key].type + rlz.inputInfo['SampledVarsPb'][key] = self.distDict[key].pdf(rlz[key]) + rlz.inputInfo['ProbabilityWeight-'+key] = self.distDict[key].pdf(rlz[key]) self.addMetaKeys(['ProbabilityWeight-'+key]) - self.inputInfo['PointProbability' ] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) # the probability weight here is not used, the post processor is going to recreate the grid associated and use a ROM for the probability evaluation - self.inputInfo['ProbabilityWeight'] = self.inputInfo['PointProbability'] - self.hangingPoints = np.vstack((self.hangingPoints,copy.copy(np.array([self.values[axis] for axis in self.axisName])))) + rlz.inputInfo['ProbabilityWeight'] = rlz.inputInfo['PointProbability'] + self.hangingPoints = np.vstack((self.hangingPoints,copy.copy(np.array([rlz[axis] for axis in self.axisName])))) self.raiseADebug('At counter '+str(self.counter)+' the generated sampled variables are: '+str(self.values)) - self.inputInfo['SamplerType'] = 'LimitSurfaceSearch' - self.inputInfo['subGridTol' ] = self.subGridTol + rlz.inputInfo['SamplerType'] = 'LimitSurfaceSearch' + rlz.inputInfo['subGridTol' ] = self.subGridTol # This is the normal derivation to be used later on # pbMapPointCoord = np.zeros((len(self.surfPoint),self.nVar*2+1,self.nVar)) @@ -815,7 +816,7 @@ def localGenerateInput(self,model,oldInput): # minIndex = np.argmin(np.abs(modGrad)) # pdDist = self.sign*(pbPoint[minIndex,2*self.nVar][0]-0.5-10*self.tolerance)/modGrad[minIndex] # for varIndex, varName in enumerate([key.replace('','') for key in self.axisName]): - # self.values[varName] = copy.copy(float(pbMapPointCoord[minIndex,2*self.nVar,varIndex]+pdDist*gradVect[minIndex,varIndex])) + # rlz[varName] = copy.copy(float(pbMapPointCoord[minIndex,2*self.nVar,varIndex]+pdDist*gradVect[minIndex,varIndex])) # gradVect = np.ndarray(self.nVar) # centraPb = pbPoint[minIndex,2*self.nVar] # centralCoor = pbMapPointCoord[minIndex,2*self.nVar,:] @@ -827,7 +828,7 @@ def localGenerateInput(self,model,oldInput): # gradVect = gradVect*pdDist # gradVect = gradVect+centralCoor # for varIndex, varName in enumerate([key.replace('','') for key in self.axisName]): - # self.values[varName] = copy.copy(float(gradVect[varIndex])) + # rlz[varName] = copy.copy(float(gradVect[varIndex])) def _formatSolutionExportVariableNames(self, acceptable): """ diff --git a/ravenframework/Samplers/ResponseSurfaceDesign.py b/ravenframework/Samplers/ResponseSurfaceDesign.py index d238a50377..ff1917344e 100644 --- a/ravenframework/Samplers/ResponseSurfaceDesign.py +++ b/ravenframework/Samplers/ResponseSurfaceDesign.py @@ -183,12 +183,13 @@ def localInitialize(self): Grid.localInitialize(self) self.limit = self.designMatrix.shape[0] - def localGenerateInput(self,model,myInput): + def localGenerateInput(self, rlz, model, myInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None @@ -196,4 +197,4 @@ def localGenerateInput(self,model,myInput): gridcoordinate = self.designMatrix[self.counter - 1][:].tolist() for cnt, varName in enumerate(self.axisName): self.gridCoordinate[cnt] = self.mapping[varName].index(gridcoordinate[cnt]) - Grid.localGenerateInput(self,model, myInput) + Grid.localGenerateInput(self, rlz, model, myInput) diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index ae10908530..daecac6f0f 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -205,7 +205,7 @@ def __init__(self): """ super().__init__() ### COUNTERS AND FLAGS ### - self.batch = 1 # determines the size of each sampling batch to run + self.batch = 0 # determines the size of each sampling batch to run self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) @@ -268,7 +268,6 @@ def __init__(self): self.addAssemblerObject('Restart', InputData.Quantity.zero_to_infinity) self.addAssemblerObject('ConstantSource', InputData.Quantity.zero_to_infinity) - def _generateDistributions(self, availableDist, availableFunc): """ Generates the distributions and functions. @@ -702,7 +701,7 @@ def getBatchSize(self): @ In, None @ Out, size, int, 0 """ - return 0 + return self.batch def localGetInitParams(self): """ @@ -1074,8 +1073,7 @@ def generateInput(self, model, modelInput): only the code interface possesses the dictionary for reading the variable definition syntax @ In, model, model instance, it is the instance of a RAVEN model @ In, modelInput, list, a list of the original Step inputs for the model (e.g. files) - @ Out, found, int, number indicating the result of sampling this variable (e.g., 0 new sample, 1 from restart) - @ Out, rlz, Realization, mapping from variables to values for sample + @ Out, rlzBatch, RealizationBatch, list of mappings from variables to values for sample @ Out, modelInput, potentially perturbed? original inputs for model, or None if taken from restart """ if model is not None: @@ -1117,7 +1115,7 @@ def generateInput(self, model, modelInput): _, inExisting = self._checkRestartForEvaluation(rlz) if inExisting is None: # we have a new evaluation, so check its contents for consistency - self._checkSample() + self._checkSample(rlz) self.raiseADebug(f' ... Batch Sample point {r}, prefix {rlz.inputInfo["prefix"]}, (var, val):') for var, val in rlz.items(): self.raiseADebug(f' ... - "{var}": "{val}"') @@ -1202,9 +1200,10 @@ def pcaTransform(self, rlz, varsDict, dist): rlz.update(manifestVariablesDict) # TODO REMOVE_applyTransformation(rlz) - def _checkSample(self): + def _checkSample(self, rlz): """ Checks the current sample for consistency with expected contents. + @ In, rlz, Realization, dict-like object to fill with sample @ In, None @ Out, None """ diff --git a/ravenframework/Samplers/Sobol.py b/ravenframework/Samplers/Sobol.py index 8429c5380f..ba6b869be3 100644 --- a/ravenframework/Samplers/Sobol.py +++ b/ravenframework/Samplers/Sobol.py @@ -148,8 +148,8 @@ def localInitialize(self): # just for each combo SG = rom.sparseGrid #they all should have the same sparseGrid SG._remap(combo) - for l in range(len(SG)): - pt, _ = SG[l] + for gridpt in SG: + pt, _ = gridpt newpt = np.zeros(len(self.features)) for v, var in enumerate(self.features): if var in combo: @@ -172,25 +172,26 @@ def localInitialize(self): #for target in self.targets: self.ROM.supervisedContainer[0].initialize(initdict) - def localGenerateInput(self, model, myInput): + def localGenerateInput(self, rlz, model, myInput): """ Function to select the next most informative point + @ In, rlz, Realization, dict-like mapping of vars to vals @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ try: pt = self.pointsToRun[self.counter-1] - except IndexError: + except IndexError as ie: self.raiseADebug('All sparse grids are complete! Moving on...') - raise utils.NoMoreSamplesNeeded + raise utils.NoMoreSamplesNeeded from ie for v, varName in enumerate(self.features): # compute the SampledVarsPb for 1-D distribution if self.variables2distributionsMapping[varName]['totDim'] == 1: for key in varName.strip().split(','): - self.values[key] = pt[v] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) - self.inputInfo['ProbabilityWeight-'+varName] = self.inputInfo['SampledVarsPb'][varName] + rlz[key] = pt[v] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) + rlz.inputInfo['ProbabilityWeight-'+varName] = rlz.inputInfo['SampledVarsPb'][varName] # compute the SampledVarsPb for N-D distribution elif self.variables2distributionsMapping[varName]['totDim'] > 1 and self.variables2distributionsMapping[varName]['reducedDim'] == 1: dist = self.variables2distributionsMapping[varName]['name'] @@ -209,12 +210,12 @@ def localGenerateInput(self, model, myInput): else: self.raiseAnError(IOError, f'The variables {var} listed in sobol sampler, but not used in the ROM!' ) for key in var.strip().split(','): - self.values[key] = pt[location] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) - self.inputInfo['ProbabilityWeight-'+dist] = self.inputInfo['SampledVarsPb'][varName] - self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['ProbabilityWeight'] = np.atleast_1d(1.0) # weight has no meaning for sobol - self.inputInfo['SamplerType'] = 'Sparse Grids for Sobol' + rlz[key] = pt[location] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) + rlz.inputInfo['ProbabilityWeight-'+dist] = rlz.inputInfo['SampledVarsPb'][varName] + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['ProbabilityWeight'] = np.atleast_1d(1.0) # weight has no meaning for sobol + rlz.inputInfo['SamplerType'] = 'Sparse Grids for Sobol' def flush(self): """ diff --git a/ravenframework/Samplers/SparseGridCollocation.py b/ravenframework/Samplers/SparseGridCollocation.py index c0f4575d83..eb9c4fb770 100644 --- a/ravenframework/Samplers/SparseGridCollocation.py +++ b/ravenframework/Samplers/SparseGridCollocation.py @@ -267,28 +267,29 @@ def _generateQuadsAndPolys(self,SVL): self.importanceDict[varName] = float(dat['weight']) - def localGenerateInput(self, model, oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point for refining the limit surface search. After this method is called, the self.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ try: - pt,weight = self.sparseGrid[self.counter-1] - except IndexError: - raise utils.NoMoreSamplesNeeded + pt, weight = self.sparseGrid[self.counter-1] + except IndexError as ie: + raise utils.NoMoreSamplesNeeded from ie for v, varName in enumerate(self.sparseGrid.varNames): # compute the SampledVarsPb for 1-D distribution if self.variables2distributionsMapping[varName]['totDim'] == 1: for key in varName.strip().split(','): - self.values[key] = pt[v] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) - self.inputInfo['ProbabilityWeight-'+varName] = self.inputInfo['SampledVarsPb'][varName] + rlz[key] = pt[v] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(pt[v]) + rlz.inputInfo['ProbabilityWeight-'+varName] = rlz.inputInfo['SampledVarsPb'][varName] # compute the SampledVarsPb for N-D distribution # Assume only one N-D distribution is associated with sparse grid collocation method elif self.variables2distributionsMapping[varName]['totDim'] > 1 and self.variables2distributionsMapping[varName]['reducedDim'] ==1: @@ -308,13 +309,13 @@ def localGenerateInput(self, model, oldInput): else: self.raiseAnError(IOError, f'The variables {var} listed in sparse grid collocation sampler, but not used in the ROM!' ) for key in var.strip().split(','): - self.values[key] = pt[location] - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) - self.inputInfo['ProbabilityWeight-'+dist] = self.inputInfo['SampledVarsPb'][varName] + rlz[key] = pt[location] + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinates) + rlz.inputInfo['ProbabilityWeight-'+dist] = rlz.inputInfo['SampledVarsPb'][varName] - self.inputInfo['ProbabilityWeight'] = weight - self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['SamplerType'] = 'Sparse Grid Collocation' + rlz.inputInfo['ProbabilityWeight'] = weight + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['SamplerType'] = 'Sparse Grid Collocation' def readFromROM(self): """ diff --git a/ravenframework/Samplers/Stratified.py b/ravenframework/Samplers/Stratified.py index 6b11702f67..dbd6a46c71 100644 --- a/ravenframework/Samplers/Stratified.py +++ b/ravenframework/Samplers/Stratified.py @@ -103,8 +103,8 @@ def localInputAndChecks(self,xmlNode, paramInput): else: # correct dimensionality given self.pointByVar = pointByVar[0] - self.inputInfo['upper'] = {} - self.inputInfo['lower'] = {} + rlz.inputInfo['upper'] = {} + rlz.inputInfo['lower'] = {} def localInitialize(self): """ @@ -155,19 +155,20 @@ def localInitialize(self): for nPoint in range(self.pointByVar-1): self.sampledCoordinate[nPoint] = [tempFillingCheck[mappingIdVarName[varName]][nPoint] for varName in self.axisName] - def localGenerateInput(self, model, oldInput): + def localGenerateInput(self, rlz, model, oldInput): """ Function to select the next most informative point for refining the limit surface search. - After this method is called, the self.inputInfo should be ready to be sent + After this method is called, the rlz.inputInfo should be ready to be sent to the model + @ In, rlz, Realization, dict-like object to fill with sample @ In, model, model instance, an instance of a model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ varCount = 0 - self.inputInfo['distributionName'] = {} # Used to determine which distribution to change if needed. - self.inputInfo['distributionType'] = {} # Used to determine which distribution type is used + rlz.inputInfo['distributionName'] = {} # Used to determine which distribution to change if needed. + rlz.inputInfo['distributionType'] = {} # Used to determine which distribution type is used weight = 1.0 for varName in self.axisName: # new implementation for ND LHS @@ -178,8 +179,8 @@ def localGenerateInput(self, model, oldInput): if self.variablesTransformationDict: for distVarName in self.distributions2variablesMapping[distName]: for subVar in utils.first(distVarName.keys()).strip().split(','): - self.inputInfo['distributionName'][subVar] = self.toBeSampled[varName] - self.inputInfo['distributionType'][subVar] = self.distDict[varName].type + rlz.inputInfo['distributionName'][subVar] = self.toBeSampled[varName] + rlz.inputInfo['distributionType'][subVar] = self.distDict[varName].type ndCoordinate = np.zeros(len(self.distributions2variablesMapping[distName])) dxs = np.zeros(len(self.distributions2variablesMapping[distName])) centerCoordinate = np.zeros(len(self.distributions2variablesMapping[distName])) @@ -199,9 +200,9 @@ def localGenerateInput(self, model, oldInput): dxs[positionList.index(position)] = self.distDict[variable].inverseMarginalDistribution(max(upper,lower),variable)-self.distDict[variable].inverseMarginalDistribution(min(upper,lower),variable) centerCoordinate[positionList.index(position)] = (self.distDict[variable].inverseMarginalDistribution(upper,variable)+self.distDict[variable].inverseMarginalDistribution(lower,variable))/2.0 for subVar in variable.strip().split(','): - self.values[subVar] = ndCoordinate[positionList.index(position)] - self.inputInfo['upper'][subVar] = self.distDict[variable].inverseMarginalDistribution(max(upper,lower),variable) - self.inputInfo['lower'][subVar] = self.distDict[variable].inverseMarginalDistribution(min(upper,lower),variable) + rlz[subVar] = ndCoordinate[positionList.index(position)] + rlz.inputInfo['upper'][subVar] = self.distDict[variable].inverseMarginalDistribution(max(upper,lower),variable) + rlz.inputInfo['lower'][subVar] = self.distDict[variable].inverseMarginalDistribution(min(upper,lower),variable) elif self.gridInfo[variable] == 'value': dxs[positionList.index(position)] = max(upper,lower) - min(upper,lower) centerCoordinate[positionList.index(position)] = (upper + lower)/2.0 @@ -209,11 +210,11 @@ def localGenerateInput(self, model, oldInput): coordinate = self.distDict[variable].inverseMarginalDistribution(coordinateCdf,variable) ndCoordinate[positionList.index(position)] = coordinate for subVar in variable.strip().split(','): - self.values[subVar] = coordinate - self.inputInfo['upper'][subVar] = max(upper,lower) - self.inputInfo['lower'][subVar] = min(upper,lower) + rlz[subVar] = coordinate + rlz.inputInfo['upper'][subVar] = max(upper,lower) + rlz.inputInfo['lower'][subVar] = min(upper,lower) gridsWeight = self.distDict[varName].cellIntegral(centerCoordinate,dxs) - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinate) + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinate) else: if self.gridInfo[varName] == 'CDF': upper = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{varName:self.sampledCoordinate[self.counter-1][varCount]+1})[varName] @@ -223,14 +224,14 @@ def localGenerateInput(self, model, oldInput): gridCoordinate, distName = self.distDict[varName].ppf(coordinate), self.variables2distributionsMapping[varName]['name'] for distVarName in self.distributions2variablesMapping[distName]: for subVar in utils.first(distVarName.keys()).strip().split(','): - self.inputInfo['distributionName'][subVar], self.inputInfo['distributionType'][subVar], self.values[subVar] = self.toBeSampled[varName], self.distDict[varName].type, np.atleast_1d(gridCoordinate)[utils.first(distVarName.values())-1] + rlz.inputInfo['distributionName'][subVar], rlz.inputInfo['distributionType'][subVar], rlz[subVar] = self.toBeSampled[varName], self.distDict[varName].type, np.atleast_1d(gridCoordinate)[utils.first(distVarName.values())-1] # coordinate stores the cdf values, we need to compute the pdf for SampledVarsPb - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(np.atleast_1d(gridCoordinate).tolist()) + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(np.atleast_1d(gridCoordinate).tolist()) gridsWeight = max(upper,lower) - min(upper,lower) else: self.raiseAnError(IOError,"Since the globalGrid is defined, the Stratified Sampler is only working when the sampling is performed on a grid on a CDF. However, the user specifies the grid on " + self.gridInfo[varName]) weight *= gridsWeight - self.inputInfo['ProbabilityWeight-'+distName] = gridsWeight + rlz.inputInfo['ProbabilityWeight-'+distName] = gridsWeight if ("" in varName) or self.variables2distributionsMapping[varName]['totDim']==1: # 1D variable # if the varName is a comma separated list of strings the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables @@ -243,32 +244,32 @@ def localGenerateInput(self, model, oldInput): ppfLower = self.distDict[varName].ppf(min(upper,lower)) ppfUpper = self.distDict[varName].ppf(max(upper,lower)) gridWeight = self.distDict[varName].cdf(ppfUpper) - self.distDict[varName].cdf(ppfLower) - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ppfValue) + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ppfValue) elif self.gridInfo[varName] == 'value': coordinateCdf = self.distDict[varName].cdf(min(upper,lower)) + (self.distDict[varName].cdf(max(upper,lower))-self.distDict[varName].cdf(min(upper,lower)))*randomUtils.random() if coordinateCdf == 0.0: self.raiseAWarning(IOError, "The grid lower bound and upper bound in value will generate ZERO cdf value!!!") coordinate = self.distDict[varName].ppf(coordinateCdf) gridWeight = self.distDict[varName].cdf(max(upper,lower)) - self.distDict[varName].cdf(min(upper,lower)) - self.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(coordinate) + rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(coordinate) # compute the weight and ProbabilityWeight-varName weight *= gridWeight - self.inputInfo['ProbabilityWeight-'+varName] = gridWeight + rlz.inputInfo['ProbabilityWeight-'+varName] = gridWeight for subVar in varName.strip().split(','): - self.inputInfo['distributionName'][subVar] = self.toBeSampled[varName] - self.inputInfo['distributionType'][subVar] = self.distDict[varName].type + rlz.inputInfo['distributionName'][subVar] = self.toBeSampled[varName] + rlz.inputInfo['distributionType'][subVar] = self.distDict[varName].type if self.gridInfo[varName] =='CDF': - self.values[subVar] = ppfValue - self.inputInfo['upper'][subVar] = ppfUpper - self.inputInfo['lower'][subVar] = ppfLower + rlz[subVar] = ppfValue + rlz.inputInfo['upper'][subVar] = ppfUpper + rlz.inputInfo['lower'][subVar] = ppfLower elif self.gridInfo[varName] =='value': - self.values[subVar] = coordinate - self.inputInfo['upper'][subVar] = max(upper,lower) - self.inputInfo['lower'][subVar] = min(upper,lower) + rlz[subVar] = coordinate + rlz.inputInfo['upper'][subVar] = max(upper,lower) + rlz.inputInfo['lower'][subVar] = min(upper,lower) - self.inputInfo['PointProbability'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) - self.inputInfo['ProbabilityWeight' ] = weight - self.inputInfo['SamplerType'] = 'Stratified' + rlz.inputInfo['PointProbability'] = reduce(mul, rlz.inputInfo['SampledVarsPb'].values()) + rlz.inputInfo['ProbabilityWeight' ] = weight + rlz.inputInfo['SamplerType'] = 'Stratified' def flush(self): """ From 6c521a110003e793e9086eed885f30cc8c7d4d25 Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 6 Nov 2024 11:21:29 -0700 Subject: [PATCH 08/18] ROM model working --- ravenframework/Models/Dummy.py | 6 ++-- ravenframework/Models/ExternalModel.py | 4 +-- ravenframework/Models/ROM.py | 31 ++++++++++--------- ravenframework/Samplers/LimitSurfaceSearch.py | 2 +- ravenframework/Samplers/Stratified.py | 4 +-- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/ravenframework/Models/Dummy.py b/ravenframework/Models/Dummy.py index 8795fa3d9a..a748548a3e 100644 --- a/ravenframework/Models/Dummy.py +++ b/ravenframework/Models/Dummy.py @@ -173,7 +173,7 @@ def createNewInput(self, myInput, samplerType, rlz): kwargs['SampledVars'] = sampledVars except KeyError: pass - return [(inputDict)],copy.deepcopy(rlz) + return [(inputDict)], copy.deepcopy(rlz) @Parallel() def evaluateSample(self, myInput, samplerType, rlz): @@ -192,9 +192,9 @@ def evaluateSample(self, myInput, samplerType, rlz): inRun = self._manipulateInput(Input[0]) # alias system self._replaceVariablesNamesWithAliasSystem(inRun,'input',True) - self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',True) + self._replaceVariablesNamesWithAliasSystem(rlz,'input',True) # build realization using input space from inRun and metadata from kwargs - rlz = dict((var,np.atleast_1d(inRun[var] if var in kwargs['SampledVars'] else kwargs[var])) for var in set(itertools.chain(kwargs.keys(),inRun.keys()))) + rlz = dict((var,np.atleast_1d(inRun[var] if var in rlz else rlz.inputInfo)) for var in set(itertools.chain(rlz.keys(),inRun.keys()))) # add dummy output space rlz['OutputPlaceHolder'] = np.atleast_1d(float(Input[1]['prefix'])) return rlz diff --git a/ravenframework/Models/ExternalModel.py b/ravenframework/Models/ExternalModel.py index fb8c993c6f..2149c88ad5 100644 --- a/ravenframework/Models/ExternalModel.py +++ b/ravenframework/Models/ExternalModel.py @@ -119,7 +119,7 @@ def initialize(self,runInfo,inputs,initDict=None): self.sim.initialize(self.initExtSelf,runInfo,inputs) Dummy.initialize(self, runInfo, inputs) - def createNewInput(self,myInput, samplerType, rlz): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @@ -330,7 +330,7 @@ def evaluateSample(self, myInput, samplerType, rlz): ## then result, which has the expected outputs and possibly changed inputs res.update(dict((var, np.atleast_1d(val)) for var, val in result.items())) ## then get the metadata, values from the input realization - rlzData = rlz.asDict() + #rlzData = rlz.asDict() res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.inputInfo.items())) ## then get the inputs from SampledVars (overwriting any other entries) res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.items())) diff --git a/ravenframework/Models/ROM.py b/ravenframework/Models/ROM.py index a10f422961..bb9a101483 100644 --- a/ravenframework/Models/ROM.py +++ b/ravenframework/Models/ROM.py @@ -192,7 +192,7 @@ def applyRunInfo(self, runInfo): """ self.numThreads = runInfo.get('NumThreads', 1) - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. here only a PointSet is accepted a local copy of the values is performed. @@ -200,14 +200,13 @@ def createNewInput(self,myInput,samplerType,**kwargs): The copied values are returned as a dictionary back @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} - @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + @ In, rlz, Realization, sample point + @ Out, ([(inputDict)],copy.deepcopy(rlz)), tuple, return the new input in a tuple form """ if len(myInput)>1: self.raiseAnError(IOError,'Only one input is accepted by the model type '+self.type+' with name'+self.name) - [(inputDict)],kwargs = super().createNewInput(myInput,samplerType,**kwargs) - return ([(inputDict)],kwargs) + [(inputDict)], rlz = super().createNewInput(myInput, samplerType, rlz) + return ([(inputDict)], rlz) def _readMoreXML(self,xmlNode): """ @@ -503,30 +502,32 @@ def _externalRun(self,inRun): return returnDict @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} - @ Out, rlz, dict, This will hold two pieces of information, + @ In, rlz, Realization, Realization from whiech to build input + @ Out, out, dict, This will hold two pieces of information, the first will be the input data used to generate this sample, the second will be the output of this model given the specified inputs """ - Input = self.createNewInput(myInput, samplerType, **kwargs) + Input = self.createNewInput(myInput, samplerType, rlz) inRun = self._manipulateInput(Input[0]) # collect results from model run result = self._externalRun(inRun) # build realization # assure rlz has all metadata - self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'] ,'input',True) - rlz = dict((var,np.atleast_1d(kwargs[var])) for var in kwargs.keys()) + self._replaceVariablesNamesWithAliasSystem(rlz ,'input',True) + # sampled values + out = dict((var,np.atleast_1d(val)) for var, val in rlz.items()) + # other sampler info + out.update(dict((var, np.atleast_1d(val)) for var, val in rlz.inputInfo.items())) # update rlz with input space from inRun and output space from result - rlz.update(dict((var,np.atleast_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(itertools.chain(result.keys(),inRun.keys())))) - return rlz + out.update(dict((var,np.atleast_1d(inRun[var] if var in rlz else result[var])) for var in set(itertools.chain(result.keys(),inRun.keys())))) + return out def setAdditionalParams(self, params): """ diff --git a/ravenframework/Samplers/LimitSurfaceSearch.py b/ravenframework/Samplers/LimitSurfaceSearch.py index 6f1d032934..36d265fd32 100644 --- a/ravenframework/Samplers/LimitSurfaceSearch.py +++ b/ravenframework/Samplers/LimitSurfaceSearch.py @@ -755,7 +755,7 @@ def localGenerateInput(self, rlz, model, oldInput): # the probability weight here is not used, the post processor is going to recreate the grid associated and use a ROM for the probability evaluation rlz.inputInfo['ProbabilityWeight'] = rlz.inputInfo['PointProbability'] self.hangingPoints = np.vstack((self.hangingPoints,copy.copy(np.array([rlz[axis] for axis in self.axisName])))) - self.raiseADebug('At counter '+str(self.counter)+' the generated sampled variables are: '+str(self.values)) + self.raiseADebug('At counter '+str(self.counter)+' the generated sampled variables are: '+str(rlz)) rlz.inputInfo['SamplerType'] = 'LimitSurfaceSearch' rlz.inputInfo['subGridTol' ] = self.subGridTol diff --git a/ravenframework/Samplers/Stratified.py b/ravenframework/Samplers/Stratified.py index dbd6a46c71..2969127d12 100644 --- a/ravenframework/Samplers/Stratified.py +++ b/ravenframework/Samplers/Stratified.py @@ -103,8 +103,8 @@ def localInputAndChecks(self,xmlNode, paramInput): else: # correct dimensionality given self.pointByVar = pointByVar[0] - rlz.inputInfo['upper'] = {} - rlz.inputInfo['lower'] = {} + self.samplerInfo['upper'] = {} + self.samplerInfo['lower'] = {} def localInitialize(self): """ From ff2c535bbda3ad36896ea1f374aac51df047095b Mon Sep 17 00:00:00 2001 From: talbpw Date: Tue, 19 Nov 2024 14:05:57 -0700 Subject: [PATCH 09/18] WIP, CodeInterfaces need to be made Realization ready. --- ravenframework/DataObjects/DataSet.py | 1 + ravenframework/DataObjects/HistorySet.py | 13 +- ravenframework/Functions.py | 29 +- ravenframework/JobHandler.py | 7 +- ravenframework/Models/Code.py | 517 ++++++++++-------- ravenframework/Models/Dummy.py | 11 +- ravenframework/Models/ExternalModel.py | 5 + ravenframework/Models/Model.py | 21 +- ravenframework/Models/PostProcessor.py | 8 +- ravenframework/Quadratures.py | 41 +- ravenframework/Samplers/EnsembleForward.py | 63 ++- ravenframework/Samplers/Sampler.py | 14 +- ravenframework/Samplers/Stratified.py | 6 +- ravenframework/Steps/MultiRun.py | 2 + ravenframework/Steps/SingleRun.py | 68 ++- .../FeatureSelection/RFE.py | 29 +- 16 files changed, 443 insertions(+), 392 deletions(-) diff --git a/ravenframework/DataObjects/DataSet.py b/ravenframework/DataObjects/DataSet.py index 5989317dd8..4c48ddca87 100644 --- a/ravenframework/DataObjects/DataSet.py +++ b/ravenframework/DataObjects/DataSet.py @@ -869,6 +869,7 @@ def vars(self): @ In, None @ Out, vars, list(str), variable names list """ + # TODO should this include indexes as well?? return self._inputs + self._outputs + self._metavars @property diff --git a/ravenframework/DataObjects/HistorySet.py b/ravenframework/DataObjects/HistorySet.py index fa444c7508..a8cb64ae02 100644 --- a/ravenframework/DataObjects/HistorySet.py +++ b/ravenframework/DataObjects/HistorySet.py @@ -15,23 +15,12 @@ Specialized implementation of DataSet to accomodate outputs that share a pivot parameter (e.g. time) """ import os -import sys -import copy -import functools import itertools -try: - import cPickle as pk -except ImportError: - import pickle as pk import xml.etree.ElementTree as ET -import abc import numpy as np -import pandas as pd -import xarray as xr -from ..BaseClasses import BaseType -from ..utils import utils, cached_ndarray, InputData, xmlUtils, mathUtils +from ..utils import mathUtils try: from .DataSet import DataSet except ValueError: #attempted relative import in non-package diff --git a/ravenframework/Functions.py b/ravenframework/Functions.py index ecbb1d8b27..14370ca4b1 100644 --- a/ravenframework/Functions.py +++ b/ravenframework/Functions.py @@ -18,20 +18,11 @@ This module contains interfaces to import external functions """ -#for future compatibility with Python 3-------------------------------------------------------------- -from __future__ import division, print_function, absolute_import -# WARNING if you import unicode_literals here, we fail tests (e.g. framework.testFactorials). This may be a future-proofing problem. 2015-04. -#End compatibility block for Python 3---------------------------------------------------------------- - -#External Modules------------------------------------------------------------------------------------ -#External Modules End-------------------------------------------------------------------------------- - -#Internal Modules------------------------------------------------------------------------------------ from .EntityFactoryBase import EntityFactory from .BaseClasses import BaseEntity, InputDataUser from .utils import utils, InputData, InputTypes from .CustomCommandExecuter import execCommand -#Internal Modules End-------------------------------------------------------------------------------- +from .Realizations import Realization class FunctionCollection(InputData.ParameterInput): """ @@ -173,28 +164,24 @@ def __importValues(self,myInput): @ In, myInput, object (dataObjects,dict), object from which the data need to be imported @ Out, None """ - if isinstance(myInput,dict): + if isinstance(myInput, (dict, Realization)): self.__inputFromWhat['dict'](myInput) else: self.raiseAnError(IOError,'Unknown type of input provided to the function '+str(self.name)) - def __inputFromDict(self,myInputDict): + def __inputFromDict(self, inDict): """ This is meant to be used to collect the input directly from a sampler generated input or simply from a generic dictionary - In case the input comes from a sampler the expected structure is myInputDict['SampledVars'][variable name] = value - In case it is a generic dictionary the expected structure is myInputDict[variable name] = value - @ In, myInputDict, dict, dict from which the data need to be imported + The expected structure is inDict[variable name] = value + @ In, inDict, dict, dict from which the data need to be imported @ Out, None """ - if 'SampledVars' in myInputDict.keys(): - inDict = myInputDict['SampledVars'] - else: - inDict = myInputDict for name in self.__inputVariables: - if name in inDict.keys(): + if name in inDict: + # FIXME this doesn't seem secure, and it's not even clear how it works ... execCommand('self.'+name+'=object["'+name+'"]',self=self,object=inDict) else: - self.raiseAnError(IOError,'The input variable '+name+' in external function seems not to be passed in') + self.raiseAnError(IOError,f'The input variable "{name}" in external function missing!') def evaluate(self,what,myInput): """ diff --git a/ravenframework/JobHandler.py b/ravenframework/JobHandler.py index ba94858e09..8efc44ed10 100644 --- a/ravenframework/JobHandler.py +++ b/ravenframework/JobHandler.py @@ -701,7 +701,9 @@ def addJobBatch(self, batch, model, modelInput, samplerType, evalFunc): if rlz.isRestart: self.addFinishedJob(rlz, metadata=rlz.inputInfo) else: - self.addJob( + # assure the realization knows about the batch it belongs to + rlz.inputInfo['batchID'] = batch.ID + self.addSingleJob( (model, modelInput, samplerType, rlz), evalFunc, rlz.inputInfo['prefix'], @@ -711,7 +713,7 @@ def addJobBatch(self, batch, model, modelInput, samplerType, evalFunc): groupInfo={'id': batch.ID, 'size': len(batch)} ) - def addJob(self, args, functionToRun, identifier, metadata=None, + def addSingleJob(self, args, functionToRun, identifier, metadata=None, forceUseThreads=False, uniqueHandler="any", clientQueue=False, groupInfo=None): """ @@ -833,7 +835,6 @@ def addClientJob(self, args, functionToRun, identifier, metadata=None, uniqueHan forceUseThreads = True, uniqueHandler = uniqueHandler, clientQueue = True, groupInfo = groupInfo) - def addFinishedJob(self, data, metadata=None, uniqueHandler="any", profile=False): """ Takes an already-finished job (for example, a restart realization) and adds it to the finished queue. diff --git a/ravenframework/Models/Code.py b/ravenframework/Models/Code.py index 1ca05d36e7..3b7957f815 100644 --- a/ravenframework/Models/Code.py +++ b/ravenframework/Models/Code.py @@ -347,68 +347,66 @@ def initialize(self,runInfoDict,inputFiles,initDict=None): # developer to modify the content of the runInfoDict self.code.initialize(copy.deepcopy(runInfoDict), self.oriInputFiles) - def createNewInput(self,currentInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. here only a PointSet is accepted a local copy of the values is performed. For a PointSet only the last set of entries are copied. The copied values are returned as a dictionary back - @ In, currentInput, list, the inputs (list) to start from to generate the new one + @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the SampledVars'that contains a dictionary {'name variable':value} - also 'additionalEdits', similar dictionary for non-variables + @ In, rlz, Realization, Realization from whiech to build input @ Out, createNewInput, tuple, return the new input in a tuple form """ - found = False - newInputSet = copy.deepcopy(currentInput) - - #TODO FIXME I don't think the extensions are the right way to classify files anymore, with the new Files - # objects. However, this might require some updating of many Code Interfaces as well. - for index, inputFile in enumerate(newInputSet): - if inputFile.getExt() in self.code.getInputExtension(): - found = True - break - if not found: - self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code ' - + self.subType +': ' + ' '.join(self.code.getInputExtension())) - - # check if in batch - brun = kwargs.get('batchRun') - if brun is not None: - # if batch, the subDir are a combination of prefix (batch id) and batch run id - bid = kwargs['prefix'] if 'prefix' in kwargs.keys() else '1' - subDirectory = os.path.join(self.workingDir,'b{}_r{}'.format(bid,brun)) - else: - subDirectory = os.path.join(self.workingDir, kwargs['prefix'] if 'prefix' in kwargs.keys() else '1') - - if not os.path.exists(subDirectory): - os.mkdir(subDirectory) - for index in range(len(newInputSet)): - subSubDirectory = os.path.join(subDirectory,newInputSet[index].subDirectory) + newInputSet = copy.deepcopy(myInput) + self._checkForInputFile(self.code.getInputExtension(), newInputSet) + ## set up run directory + batchID = rlz.inputInfo['batchID'] + rlzID = rlz.inputInfo['prefix'] + dirName = f'b{batchID}_r{rlzID}' + # FIXME we're always in batch! + subDir = os.path.join(self.workingDir, dirName) + # OLD # + # brun = kwargs.get('batchRun') + # if brun is not None: + # # if batch, the subDir are a combination of prefix (batch id) and batch run id + # bid = kwargs['prefix'] if 'prefix' in kwargs.keys() else '1' + # subDirectory = os.path.join(self.workingDir,'b{}_r{}'.format(bid,brun)) + # else: + # subDirectory = os.path.join(self.workingDir, kwargs['prefix'] if 'prefix' in kwargs.keys() else '1') + + if not os.path.exists(subDir): + os.mkdir(subDir) + + for n, newInput in enumerate(newInputSet): + subSubDir = os.path.join(subDir, newInput.subDirectory) ## Currently, there are no tests that verify the lines below can be hit ## It appears that the folders already exist by the time we get here, ## this could change, so we will leave this code here. ## -- DPM 8/2/17 - if newInputSet[index].subDirectory.strip() != "" and not os.path.exists(subSubDirectory): - utils.makeDir(subSubDirectory) + if newInput.subDirectory.strip() != "" and not os.path.exists(subSubDir): + utils.makeDir(subSubDir) ########################################################################## - newInputSet[index].setPath(subSubDirectory) - shutil.copy(self.oriInputFiles[index].getAbsFile(),subSubDirectory) + newInput.setPath(subSubDir) + shutil.copy(self.oriInputFiles[n].getAbsFile(), subSubDir) - kwargs['subDirectory'] = subDirectory - kwargs['alias'] = self.alias + rlz.inputInfo['subDirectory'] = subDir + rlz.inputInfo['alias'] = self.alias - if 'SampledVars' in kwargs.keys(): - sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) + self._replaceVariablesNamesWithAliasSystem(rlz, 'input', False) + # OLD # + # if 'SampledVars' in kwargs.keys(): + # sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) - newInput = self.code.createNewInput(newInputSet,self.oriInputFiles,samplerType,**copy.deepcopy(kwargs)) + # FIXME do we force all Codes to update to this new format, or do we grandfather in somehow? + # OLD newInput = self.code.createNewInput(newInputSet, self.oriInputFiles, samplerType, **copy.deepcopy(kwargs)) + newInput = self.code.createNewInput(newInputSet, self.oriInputFiles, samplerType, rlz) - if 'SampledVars' in kwargs.keys() and len(self.alias['input'].keys()) != 0: - kwargs['SampledVars'] = sampledVars + # if 'SampledVars' in kwargs.keys() and len(self.alias['input'].keys()) != 0: + # kwargs['SampledVars'] = sampledVars - return (newInput,kwargs) + return (newInput, rlz) def _expandCommand(self, origCommand): """ @@ -488,22 +486,34 @@ def findMsys(): return origCommand @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars that contains a dictionary {'name variable':value} + @ In, rlz, Realization, Realization from whiech to build input @ Out, returnValue, tuple, This will hold two pieces of information, the first item will be the input data used to generate this sample, the second item will be the output of this model given the specified inputs """ - inputFiles = self.createNewInput(myInput, samplerType, **kwargs) - self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None) - returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec) + print('DEBUGG rlz type:', type(rlz)) + inputFiles = self.createNewInput(myInput, samplerType, rlz) + if isinstance(inputFiles, tuple): + # FIXME why is this a class variable? Should it be only within this method scope instead? + self.currentInputFiles = copy.deepcopy(inputFiles[0]) + metaData = copy.deepcopy(inputFiles[1]) + else: + self.currentInputFiles = inputFiles + metaData = None + # OLD # + # self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None) + returnedCommand = self.code.genCommand(self.currentInputFiles, + self.executable, + flags=self.clargs, + fileArgs=self.fargs, + preExec=self.preExec) ## Given that createNewInput can only return a tuple, I don't think these ## checks are necessary (keeping commented out until someone else can verify): @@ -513,11 +523,12 @@ def evaluateSample(self, myInput, samplerType, kwargs): # self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!") executeCommand, self.outFileRoot = returnedCommand - precommand = kwargs['precommand'] - postcommand = kwargs['postcommand'] - bufferSize = kwargs['logfileBuffer'] - fileExtensionsToDelete = kwargs['deleteOutExtension'] - deleteSuccessfulLogFiles = kwargs['delSucLogFiles'] + info = rlz.inputInfo + precommand = info['precommand'] + postcommand = info['postcommand'] + bufferSize = info['logfileBuffer'] + fileExtensionsToDelete = info['deleteOutExtension'] + deleteSuccessfulLogFiles = info['delSucLogFiles'] codeLogFile = self.outFileRoot if codeLogFile is None: @@ -526,119 +537,65 @@ def evaluateSample(self, myInput, samplerType, kwargs): ## Before we were temporarily changing directories in order to copy the ## correct directory to the subprocess. Instead, we can just set the ## directory after we copy it over. -- DPM 5/5/2017 - sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory']) + sampleDirectory = os.path.join(os.getcwd(), metaData['subDirectory']) localenv = dict(os.environ) localenv['PWD'] = str(sampleDirectory) - outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize) - - found = False - for index, inputFile in enumerate(self.currentInputFiles): - if inputFile.getExt() in self.code.getInputExtension(): - found = True - break - if not found: - self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code ' - + self.subType +': ' + ' '.join(self.getInputExtension())) - commands=[] - for runtype,cmd in executeCommand: - newCommand='' - if runtype.lower() == 'parallel': - newCommand += precommand - newCommand += cmd+' ' - newCommand += postcommand - commands.append(newCommand) - elif runtype.lower() == 'serial': - commands.append(cmd) + toOpen = os.path.join(sampleDirectory,codeLogFile) + with open(toOpen, 'w', bufferSize, encoding='utf-8') as outFileObject: + # check that the desired input file is the one we expect (using extensions) + self._checkForInputFile(self.code.getInputExtension(), self.currentInputFiles) + # collect the run command components + + # assemble the execution command + command = self._assembleCommand(executeCommand, precommand, postcommand, info, sampleDirectory) + self.raiseAMessage('Execution command submitted:',command) + # extend the command as needed for e.g. operating systems + if platform.system() == 'Windows': + command = self._expandForWindows(command) + self.raiseAMessage("modified command to", repr(command)) + for key, value in localenv.items(): + localenv[key]=str(value) + elif not self.code.getRunOnShell(): + command = self._expandCommand(command) + self.raiseADebug(f'shell execution command: "{command}"') + self.raiseADebug('shell cwd: "'+localenv['PWD']+'"') + self.raiseADebug('self pid:' + str(os.getpid())+' ppid: '+str(os.getppid())) + ## reset python path + localenv.pop('PYTHONPATH',None) + ## This code should be evaluated by the job handler, so it is fine to wait + ## until the execution of the external subprocess completes. + process = utils.pickleSafeSubprocessPopen(command, shell=self.code.getRunOnShell(), stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv) + if self.maxWallTime is not None: + timeout = time.time() + self.maxWallTime + while True: + time.sleep(0.5) + process.poll() + if time.time() > timeout and process.returncode is None: + self.raiseAWarning('walltime exceeded in run in working dir: '+str(metaData['subDirectory'])+'. Killing the run...') + process.kill() + process.returncode = -1 + if process.returncode is not None or time.time() > timeout: + break else: - self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"! Instead received: ', - runtype,'\nPlease check the code interface.') - - command = f' {self.commandSeparator} '.join(commands)+' ' - - command = command.replace("%INDEX%",kwargs['INDEX']) - command = command.replace("%INDEX1%",kwargs['INDEX1']) - command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID']) - command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1']) - command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR']) - command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR']) - ## Note this is the working directory that the subprocess will use, it is - ## not the directory I am currently working. This bit me as I moved the code - ## from the old ExternalRunner because in that case this was filled in after - ## the process was submitted by the process itself. -- DPM 5/4/17 - command = command.replace("%WORKING_DIR%",sampleDirectory) - command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR']) - command = command.replace("%METHOD%",kwargs['METHOD']) - command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS']) - command = command.replace("%PYTHON%", sys.executable) - - if "raven_framework" in sys.executable: - ravenExecutable = sys.executable - elif "python" in os.path.basename(sys.executable) \ - and "raven_framework" in sys.argv[0] \ - and sys.argv[0].endswith(".py"): - # command was "python path/to/raven_framework.py ..." - ravenExecutable = f"{sys.executable} {sys.argv[0]}" - else: - ravenExecutable = '' + process.wait() + + returnCode = process.returncode + self.raiseADebug(" Process "+str(process.pid)+" finished "+time.ctime()+ + " with returncode "+str(process.returncode)) + # procOutput = process.communicate()[0] + + ## If the returnCode is already non-zero, we should maintain our current + ## value as it may have some meaning that can be parsed at some point, so + ## only set the returnCode to -1 in here if we did not already catch the + ## failure. + if returnCode == 0 and 'checkForOutputFailure' in dir(self.code): + codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory']) + if codeFailed: + returnCode = -1 + # close the log file + # OLD outFileObject.close() + ## END "with open outFileObject" context - if "%RAVENEXECUTABLE%" in command and ravenExecutable == '': - message = f"""The command contains %RAVENEXECUTABLE% but the way the outer framework was run - could not be inferred. Only using scripts or executables that contain 'raven_framework' or - using python to run a .py file with 'raven_framework' in the name is supported. Possibilities - considered were: - 1. 'raven_framework' in sys.executable (received: {sys.executable}) - 2. 'raven_framework' or 'raven_framework.py' in sys.argv[0] (received: {sys.argv[0]}) - Note that users may also directly specify the path to an appropriate raven_framework - executable instead of using the %RAVENEXECUTABLE% placeholder.""" - self.raiseAnError(IOError, message) - - command = command.replace("%RAVENEXECUTABLE%", ravenExecutable) - - self.raiseAMessage('Execution command submitted:',command) - if platform.system() == 'Windows': - command = self._expandForWindows(command) - self.raiseAMessage("modified command to", repr(command)) - for key, value in localenv.items(): - localenv[key]=str(value) - elif not self.code.getRunOnShell(): - command = self._expandCommand(command) - self.raiseADebug(f'shell execution command: "{command}"') - self.raiseADebug('shell cwd: "'+localenv['PWD']+'"') - self.raiseADebug('self pid:' + str(os.getpid())+' ppid: '+str(os.getppid())) - ## reset python path - localenv.pop('PYTHONPATH',None) - ## This code should be evaluated by the job handler, so it is fine to wait - ## until the execution of the external subprocess completes. - process = utils.pickleSafeSubprocessPopen(command, shell=self.code.getRunOnShell(), stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv) - if self.maxWallTime is not None: - timeout = time.time() + self.maxWallTime - while True: - time.sleep(0.5) - process.poll() - if time.time() > timeout and process.returncode is None: - self.raiseAWarning('walltime exceeded in run in working dir: '+str(metaData['subDirectory'])+'. Killing the run...') - process.kill() - process.returncode = -1 - if process.returncode is not None or time.time() > timeout: - break - else: - process.wait() - - returnCode = process.returncode - self.raiseADebug(" Process "+str(process.pid)+" finished "+time.ctime()+ - " with returncode "+str(process.returncode)) - # procOutput = process.communicate()[0] - - ## If the returnCode is already non-zero, we should maintain our current - ## value as it may have some meaning that can be parsed at some point, so - ## only set the returnCode to -1 in here if we did not already catch the - ## failure. - if returnCode == 0 and 'checkForOutputFailure' in dir(self.code): - codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory']) - if codeFailed: - returnCode = -1 - # close the log file - outFileObject.close() ## We should try and use the output the code interface gives us first, but ## in lieu of that we should fall back on the standard output of the code ## (Which was deleted above in some cases, so I am not sure if this was @@ -651,7 +608,7 @@ def evaluateSample(self, myInput, samplerType, kwargs): if 'finalizeCodeOutput' in dir(self.code) and returnCode == 0: finalCodeOutput = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory']) ## Special case for RAVEN interface --ALFOA 09/17/17 - ravenCase = type(finalCodeOutput).__name__ == 'dict' and self.code.__class__.__name__ == 'RAVEN' + ravenCase = isinstance(finalCodeOutput, dict) and self.code.__class__.__name__ == 'RAVEN' # check return of finalizecode output if finalCodeOutput is not None: isDict = isinstance(finalCodeOutput,dict) @@ -674,11 +631,12 @@ def evaluateSample(self, myInput, samplerType, kwargs): outFile.initialize(outputFile+'.csv', path=metaData['subDirectory']) csvLoader = CsvLoader.CsvLoader() - # does this CodeInterface have sufficiently intense (or limited) CSV files that - # it needs to assume floats and use numpy, or can we use pandas? loadUtility = self.code.getCsvLoadUtil() csvData = csvLoader.loadCsvFile(outFile.getAbsFile(), nullOK=False, utility=loadUtility) returnDict = csvLoader.toRealization(csvData) + #else: + # # FIXME returnDict is not defined if we get here! + # self.raiseAnError(RuntimeError, 'This should not be reached.') if not ravenCase: # check if the csv needs to be printed @@ -686,8 +644,8 @@ def evaluateSample(self, myInput, samplerType, kwargs): csvFileName = os.path.join(metaData['subDirectory'],outputFile+'.csv') pd.DataFrame.from_dict(returnDict).to_csv(path_or_buf=csvFileName,index=False) self._replaceVariablesNamesWithAliasSystem(returnDict, 'inout', True) - returnDict.update(kwargs) - returnValue = (kwargs['SampledVars'],returnDict) + returnDict.update(info) + returnValue = (rlz ,returnDict) exportDict = self.createExportDictionary(returnValue) else: # we have the DataObjects -> raven-runs-raven case only so far @@ -700,51 +658,53 @@ def evaluateSample(self, myInput, samplerType, kwargs): exportDict = {'RAVEN_isBatch':True,'realizations':[]} ## set up each realization for n in range(numRlz): - rlz = {} + out = {} ## collect the results from INNER, both point set and history set for dataObj in finalCodeOutput.values(): # TODO FIXME check for overwriting data. For now just replace data if it's duplicate! new = dict((var,np.atleast_1d(val)) for var,val in dataObj.realization(index=n,unpackXArray=True).items()) - rlz.update( new ) + out.update( new ) ## add OUTER input space # TODO FIXME check for overwriting data. For now just replace data if it's duplicate! - new = dict((var,np.atleast_1d(val)) for var,val in kwargs['SampledVars'].items()) - rlz.update( new ) + new = dict((var,np.atleast_1d(val)) for var,val in rlz.items()) + out.update( new ) ## combine ProbabilityWeights # TODO FIXME these are a rough attempt at getting it right! - rlz['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * kwargs.get('ProbabilityWeight',1.0)) - rlz['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * kwargs.get('PointProbability',1.0)) + out['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * rlz.inputInfo.get('ProbabilityWeight',1.0)) + out['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * rlz.inputInfo.get('PointProbability',1.0)) # FIXME: adding "_n" to Optimizer samples scrambles its ability to find evaluations! ## temporary fix: only append if there's multiple realizations, and error out if sampler is an optimizer. if numRlz > 1: - if '_' in kwargs['prefix']: + # FIXME this is VERY implicit checking!!! + if '_' in info['prefix']: self.raiseAnError(RuntimeError,'OUTER RAVEN is using an OPTIMIZER, but INNER RAVEN is returning multiple realizations!') addon = '_{}'.format(n) else: addon = '' - rlz['prefix'] = np.atleast_1d(kwargs['prefix']+addon) + out['prefix'] = np.atleast_1d(info['prefix']+addon) ## add the rest of the metadata # TODO slow - for var,val in kwargs.items(): - if var not in rlz.keys(): - rlz[var] = np.atleast_1d(val) - self._replaceVariablesNamesWithAliasSystem(rlz,'inout',True) - exportDict['realizations'].append(rlz) + for var,val in rlz.asDict.items(): + if var not in out.keys(): + out[var] = np.atleast_1d(val) + self._replaceVariablesNamesWithAliasSystem(out, 'inout', True) + exportDict['realizations'].append(out) ## The last thing before returning should be to delete the temporary log ## file and any other file the user requests to be cleared if deleteSuccessfulLogFiles: - self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!') + self.raiseAMessage(f' Run "{info["prefix"]}" ended smoothly, removing log file!') codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile) if os.path.exists(codeLofFileFullPath): os.remove(codeLofFileFullPath) ## Check if the user specified any file extensions for clean up for fileExt in fileExtensionsToDelete: - fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ] + fileList = [os.path.join(metaData['subDirectory'],f) + for f in os.listdir(metaData['subDirectory']) + if f.endswith(fileExt)] for f in fileList: os.remove(f) - return exportDict - + # END if run successful else: self.raiseAMessage("*"*50) self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode)) @@ -757,10 +717,89 @@ def evaluateSample(self, myInput, samplerType, kwargs): else: self.raiseAMessage(" No output " + absOutputFile) self.raiseAMessage("*"*50) - - ## If you made it here, then the run must have failed return None + def _checkForInputFile(self, extension, fileList): + """ + Check the relevant input file is available + @ In, extension, str, desired file extension + @ In, fileList, list, list of existing files to search within + @ Out, None + """ + #TODO FIXME I don't think the extensions are the right way to classify files anymore, with the new Files + # objects. However, this might require some updating of many Code Interfaces as well. + for _, inputFile in enumerate(fileList): + if inputFile.getExt() in extension: + break + else: + self.raiseAnError(IOError,'None of the input files has one of the extensions requested by' + + f' code {self.subType}: ' + ' '.join(extension)) + return inputFile + + def _assembleCommand(self, executeCommand, preCommand, postCommand, info, sampleDir): + """ + Compiles the run command and replaces string variables. + @ In, executeCommand, list, portions of run command + @ In, preCommand, str, command to prepend to execution + @ In, postCommand, str, command to add to end of execution + @ In, info, dict, collection of available data + @ In, sampleDir, str, directory in which run is occuring + @ Out, command, str, run command + """ + # collect command elements + commands=[] + for runtype,cmd in executeCommand: + newCommand='' + if runtype.lower() == 'parallel': + newCommand += preCommand + newCommand += cmd+' ' + newCommand += postCommand + commands.append(newCommand) + elif runtype.lower() == 'serial': + commands.append(cmd) + else: + self.raiseAnError(IOError,f'For execution command <{cmd}> the run type was neither "serial" nor "parallel"!' +\ + f'Instead received: "{runtype}" \nPlease check the code interface.') + # assemble command + command = f' {self.commandSeparator} '.join(commands)+' ' + command = command.replace("%INDEX%", info['INDEX']) + command = command.replace("%INDEX1%", info['INDEX1']) + command = command.replace("%CURRENT_ID%", info['CURRENT_ID']) + command = command.replace("%CURRENT_ID1%", info['CURRENT_ID1']) + command = command.replace("%SCRIPT_DIR%", info['SCRIPT_DIR']) + command = command.replace("%FRAMEWORK_DIR%", info['FRAMEWORK_DIR']) + ## Note this is the working directory that the subprocess will use, it is + ## not the directory I am currently working. This bit me as I moved the code + ## from the old ExternalRunner because in that case this was filled in after + ## the process was submitted by the process itself. -- DPM 5/4/17 + command = command.replace("%WORKING_DIR%", sampleDir) + command = command.replace("%BASE_WORKING_DIR%", info['BASE_WORKING_DIR']) + command = command.replace("%METHOD%", info['METHOD']) + command = command.replace("%NUM_CPUS%", info['NUM_CPUS']) + command = command.replace("%PYTHON%", sys.executable) + # handle RAVEN executable if part of command + if "raven_framework" in sys.executable: + ravenExecutable = sys.executable + elif "python" in os.path.basename(sys.executable) \ + and "raven_framework" in sys.argv[0] \ + and sys.argv[0].endswith(".py"): + # command was "python path/to/raven_framework.py ..." + ravenExecutable = f"{sys.executable} {sys.argv[0]}" + else: + ravenExecutable = '' + if "%RAVENEXECUTABLE%" in command and ravenExecutable == '': + message = f"""The command contains %RAVENEXECUTABLE% but the way the outer framework was run + could not be inferred. Only using scripts or executables that contain 'raven_framework' or + using python to run a .py file with 'raven_framework' in the name is supported. Possibilities + considered were: + 1. 'raven_framework' in sys.executable (received: {sys.executable}) + 2. 'raven_framework' or 'raven_framework.py' in sys.argv[0] (received: {sys.argv[0]}) + Note that users may also directly specify the path to an appropriate raven_framework + executable instead of using the %RAVENEXECUTABLE% placeholder.""" + self.raiseAnError(IOError, message) + command = command.replace("%RAVENEXECUTABLE%", ravenExecutable) + return command + def createExportDictionary(self, evaluation): """ Method that is aimed to create a dictionary with the sampled and output variables that can be collected by the different @@ -887,7 +926,7 @@ def collectOutputFromDataObject(self,exportDict,output): #TODO: Seems to me, this function can be removed --- wangc Dec. 2017 - def collectOutputFromDict(self,exportDict,output,options=None): + def collectOutputFromDict(self, exportDict, output, options=None): """ Collect results from dictionary @ In, exportDict, dict, contains 'inputs','outputs','metadata' @@ -895,6 +934,7 @@ def collectOutputFromDict(self,exportDict,output,options=None): @ In, options, dict, optional, dictionary of options that can be passed in when the collect of the output is performed by another model (e.g. EnsembleModel) @ Out, None """ + DEPRECABLE prefix = exportDict.pop('prefix',None) if 'inputSpaceParams' in exportDict.keys(): inKey = 'inputSpaceParams' @@ -913,78 +953,83 @@ def collectOutputFromDict(self,exportDict,output,options=None): return - def submit(self, myInput, samplerType, jobHandler, **kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, None """ - nRuns = 1 - batchMode = kwargs.get("batchMode", False) - if batchMode: - nRuns = kwargs["batchInfo"]['nRuns'] - - for i in range(nRuns): - if batchMode: - kw = kwargs['batchInfo']['batchRealizations'][i] - else: - kw = kwargs - - prefix = kw.get("prefix") - uniqueHandler = kw.get("uniqueHandler",'any') - # if batch mode is on, lets record the run id within the batch - if batchMode: - kw['batchRun'] = i+1 + # OLD + # nRuns = len(batch) + # batchMode = kwargs.get("batchMode", False) + # if batchMode: + # nRuns = kwargs["batchInfo"]['nRuns'] + + for r, rlz in enumerate(batch): + #shortcut for convenience + info = rlz.inputInfo + # UNUSED prefix = info['prefix'] + # FIXME find out who uses this and update where that info gets stored + # -> looks like SupervisedLearning/FeatureSelection/RFE might be the only one + # -> but it also looks like Samplers/StochasticCollocation should be using it? + # -> Should uniqueHandler actually be "requester" and formalized? + # -> Should uniqueHandler be part of the Model at all, or just jobHandler? + # UNUSED uniqueHandler = rlz.inputInfo.get('uniqueHandler', 'any') ## These two are part of the current metadata, so they will be added before ## the job is started, so that they will be captured in the metadata and match ## the current behavior of the system. If these are not desired, then this ## code can be moved to later. -- DPM 4/12/17 - kw['executable'] = self.executable - kw['outfile'] = None + # -> carried into the new batching system, PWT 2024-11 + info['executable'] = self.executable + info['outfile'] = None #TODO FIXME I don't think the extensions are the right way to classify files anymore, with the new Files - # objects. However, this might require some updating of many CodeInterfaces`````` 1 Interfaces as well. + # objects. However, this might require some updating of many CodeInterfaces, Interfaces as well. for index, inputFile in enumerate(myInput): if inputFile.getExt() in self.code.getInputExtension(): - kw['outfile'] = 'out~'+myInput[index].getBase() + info['outfile'] = 'out~'+myInput[index].getBase() break - if kw['outfile'] is None: - self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code ' - + self.subType +': ' + ' '.join(self.code.getInputExtension())) - + if info['outfile'] is None: + self.raiseAnError(IOError, + 'None of the input files has one of the extensions requested by code ' +\ + f'{self.subType}: {" ".join(self.code.getInputExtension())}') ## These kw are updated by createNewInput, so the job either should not ## have access to the metadata, or it needs to be updated from within the ## evaluateSample function, which currently is not possible since that ## function does not know about the job instance. - metadata = copy.copy(kw) - + # UNUSED metadata = copy.copy(info) ## These variables should not be part of the metadata, so add them after ## we copy this dictionary (Caught this when running an ensemble model) ## -- DPM 4/11/17 - nodesList = jobHandler.runInfoDict.get('Nodes',[]) - kw['logfileBuffer' ] = jobHandler.runInfoDict['logfileBuffer'] - kw['precommand' ] = jobHandler.runInfoDict['precommand'] - kw['postcommand' ] = jobHandler.runInfoDict['postcommand'] - kw['delSucLogFiles' ] = jobHandler.runInfoDict['delSucLogFiles'] - kw['deleteOutExtension'] = jobHandler.runInfoDict['deleteOutExtension'] - kw['NumMPI' ] = jobHandler.runInfoDict.get('NumMPI',1) - kw['numberNodes' ] = len(nodesList) - - ## This may look a little weird, but due to how the parallel python library - ## works, we are unable to pass a member function as a job because the - ## pp library loses track of what self is, so instead we call it from the - ## class and pass self in as the first parameter - jobHandler.addJob((self, myInput, samplerType, kw), self.__class__.evaluateSample, prefix, metadata=metadata, - uniqueHandler=uniqueHandler, groupInfo={'id': kwargs['batchInfo']['batchId'], 'size': nRuns} if batchMode else None) - if nRuns == 1: - self.raiseAMessage('job "' + str(prefix) + '" submitted!') - else: - self.raiseAMessage('job "' + str(i+1) + '" in batch "'+str(kwargs['batchInfo']['batchId']) + '" submitted!') + nodesList = jobHandler.runInfoDict.get('Nodes',[]) + info['logfileBuffer' ] = jobHandler.runInfoDict['logfileBuffer'] + info['precommand' ] = jobHandler.runInfoDict['precommand'] + info['postcommand' ] = jobHandler.runInfoDict['postcommand'] + info['delSucLogFiles' ] = jobHandler.runInfoDict['delSucLogFiles'] + info['deleteOutExtension'] = jobHandler.runInfoDict['deleteOutExtension'] + info['NumMPI' ] = jobHandler.runInfoDict.get('NumMPI',1) + info['numberNodes' ] = len(nodesList) + + self.raiseAMessage(f'batch "{batch.ID}" job {r} "{info["prefix"]}" submitted!') + # OLD # + # jobHandler.addJob((self, myInput, samplerType, rlz), + # self.__class__.evaluateSample, + # prefix, + # metadata=metadata, + # uniqueHandler=uniqueHandler, + # groupInfo=groupInfo) + # submit batch of jobs together + ## This may look a little weird, but due to how the parallel python library + ## works, we are unable to pass a member function as a job because the + ## pp library loses track of what self is, so instead we call it from the + ## class and pass self in as the first parameter + jobHandler.addJobBatch(batch, self, myInput, samplerType, self.__class__.evaluateSample) + + diff --git a/ravenframework/Models/Dummy.py b/ravenframework/Models/Dummy.py index a748548a3e..cb2156a63c 100644 --- a/ravenframework/Models/Dummy.py +++ b/ravenframework/Models/Dummy.py @@ -194,12 +194,13 @@ def evaluateSample(self, myInput, samplerType, rlz): self._replaceVariablesNamesWithAliasSystem(inRun,'input',True) self._replaceVariablesNamesWithAliasSystem(rlz,'input',True) # build realization using input space from inRun and metadata from kwargs - rlz = dict((var,np.atleast_1d(inRun[var] if var in rlz else rlz.inputInfo)) for var in set(itertools.chain(rlz.keys(),inRun.keys()))) + out = dict((var,np.atleast_1d(val)) for var, val in inRun.items()) + out.update((var, np.atleast_1d(val)) for var, val in rlz.asDict().items()) # add dummy output space - rlz['OutputPlaceHolder'] = np.atleast_1d(float(Input[1]['prefix'])) - return rlz + out['OutputPlaceHolder'] = np.atleast_1d(float(Input[1].inputInfo['prefix'])) + return out - def collectOutput(self,finishedJob,output,options=None): + def collectOutput(self, finishedJob, output, options=None): """ Method that collects the outputs from the previous run @ In, finishedJob, InternalRunner object, instance of the run just finished @@ -217,7 +218,7 @@ def collectOutput(self,finishedJob,output,options=None): output.addRealization(result) # END can be abstracted to base class - def collectOutputFromDict(self,exportDict,output,options=None): + def collectOutputFromDict(self, exportDict, output, options=None): """ Collect results from a dictionary @ In, exportDict, dict, contains 'inputSpaceParams','outputSpaceParams','metadata' diff --git a/ravenframework/Models/ExternalModel.py b/ravenframework/Models/ExternalModel.py index 2149c88ad5..bab609570f 100644 --- a/ravenframework/Models/ExternalModel.py +++ b/ravenframework/Models/ExternalModel.py @@ -336,6 +336,11 @@ def evaluateSample(self, myInput, samplerType, rlz): res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.items())) if '_indexMap' in res: res['_indexMap'].update(evalIndexMap) + print('DEBUGG what is here?') + print('DEBUGG indexMap:', res.get('_indexMap', None)) + print('DEBUGG res:') + for k, v in res.items(): + print('DEBUGG ... ', k, v) return res def collectOutput(self,finishedJob,output,options=None): diff --git a/ravenframework/Models/Model.py b/ravenframework/Models/Model.py index 4e92f9851c..9fbeeec389 100644 --- a/ravenframework/Models/Model.py +++ b/ravenframework/Models/Model.py @@ -288,7 +288,7 @@ def _getVariableList(self, type): def _replaceVariablesNamesWithAliasSystem(self, sampledVars, aliasType='input', fromModelToFramework=False): """ Method to convert kwargs Sampled vars with the alias system - @ In, sampledVars, dict or list, dictionary or list that are going to be modified + @ In, sampledVars, dict/list/Realization, dictionary or list that are going to be modified @ In, aliasType, str, optional, type of alias to be replaced @ In, fromModelToFramework, bool, optional, When we define aliases for some input variables, we need to be sure to convert the variable names (if alias is of type input) coming from RAVEN (e.g. sampled variables) into the corresponding names @@ -304,20 +304,21 @@ def _replaceVariablesNamesWithAliasSystem(self, sampledVars, aliasType='input', else: listAliasType = [aliasType] originalVariables = copy.deepcopy(sampledVars) + notFound = 2**62 # ??? Magic var? for aliasTyp in listAliasType: for varFramework,varModel in self.alias[aliasTyp].items(): whichVar = varModel if fromModelToFramework else varFramework - notFound = 2**62 - if type(originalVariables).__name__ != 'list': + if isinstance(originalVariables, list): + if whichVar in sampledVars: + sampledVars[sampledVars.index(whichVar)] = varFramework if fromModelToFramework else varModel + else: + # rlz behaves like a dict, so same algo works for both found = sampledVars.pop(whichVar,[notFound]) if not np.array_equal(np.asarray(found), [notFound]): if fromModelToFramework: sampledVars[varFramework] = originalVariables[varModel] else: - sampledVars[varModel] = originalVariables[varFramework] - else: - if whichVar in sampledVars: - sampledVars[sampledVars.index(whichVar)] = varFramework if fromModelToFramework else varModel + sampledVars[varModel] = originalVariables[varFramework] return originalVariables def _handleInput(self, paramInput): @@ -409,16 +410,14 @@ def serialize(self,fileObjIn,**kwargs): fileObj.close() @abc.abstractmethod - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, point in sample space to evaluate @ Out, [(kwargs)], list, return the new input in a list form """ - return [(copy.copy(kwargs))] def submit(self, batch, myInput, samplerType, jobHandler): """ diff --git a/ravenframework/Models/PostProcessor.py b/ravenframework/Models/PostProcessor.py index b997f41990..33fa2297f0 100644 --- a/ravenframework/Models/PostProcessor.py +++ b/ravenframework/Models/PostProcessor.py @@ -207,11 +207,12 @@ def evaluateSample(self, myInput, samplerType, kwargs): returnValue = (ppInput, self._pp.run(ppInput)) return returnValue - def submit(self, myInput, samplerType, jobHandler, **kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, passing through (consistent with base class but not used) @ In, jobHandler, JobHandler instance, the global job handler instance @@ -220,8 +221,9 @@ def submit(self, myInput, samplerType, jobHandler, **kwargs): and output of the PostProcessor, as well as other control options for the PostProcessor @ Out, None """ - kwargs['forceThreads'] = True - super().submit(myInput, samplerType, jobHandler, **kwargs) + for rlz in batch: + rlz.inputInfo['forceThreads'] = True + super().submit(batch, myInput, samplerType, jobHandler) def collectOutput(self, finishedJob, output): """ diff --git a/ravenframework/Quadratures.py b/ravenframework/Quadratures.py index fa13a2964f..42a5558088 100644 --- a/ravenframework/Quadratures.py +++ b/ravenframework/Quadratures.py @@ -16,9 +16,7 @@ @author: talbpw """ -#for future compatibility with Python 3----------------------------------------------- -from __future__ import division, print_function, unicode_literals, absolute_import -#End compatibility block for Python 3------------------------------------------------- +import time #External Modules--------------------------------------------------------------------- import numpy as np @@ -449,53 +447,52 @@ def initialize(self, varNames, indexSet, distDict, quadDict, handler): if handler!=None: self.parallelSparseQuadGen(handler) else: - for j,cof in enumerate(self.c): + for j, cof in enumerate(self.c): idx = self.indexSet[j] m = self.quadRule(idx)+1 - new = self.tensorGrid.original_function(self, m) + new = self.tensorGrid.original_function(self, m) for i in range(len(new[0])): newpt=tuple(new[0][i]) - newwt=new[1][i]*cof + newwt=new[1][i] * cof if newpt in self.SG.keys(): self.SG[newpt]+=newwt else: self.SG[newpt] = newwt - def parallelSparseQuadGen(self,handler): + def parallelSparseQuadGen(self, handler): """ Generates sparse quadrature points in parallel. @ In, handler, JobHandler, parallel processing tool @ Out, None """ numRunsNeeded=len(self.c) - j=-1 + j = -1 prefix = 'sparseTensor_' while True: finishedJobs = handler.getFinished(jobIdentifier=prefix) #FIXME this is by far the most expensive line in this method - #finishedJobs = handler.getFinished(prefix=prefix) #FIXME this is by far the most expensive line in this method for job in finishedJobs: if job.getReturnCode() == 0: new = job.getEvaluation() for i in range(len(new[0])): newpt = tuple(new[0][i]) - newwt = new[1][i]*float(str(job.identifier).replace(prefix, "")) + newwt = new[1][i] * float(str(job.identifier).replace(prefix, "")) if newpt in self.SG.keys(): - self.SG[newpt]+= newwt + self.SG[newpt] += newwt else: self.SG[newpt] = newwt else: - self.raiseAMessage('Sparse quad generation (tensor) '+job.identifier+' failed...') - if j different between SingleRun and PostProcessor runs - # if self.type == 'SingleRun': - # newInput = model.createNewInput(inputs,'None',**{'SampledVars':{},'additionalEdits':{}}) - # else: - # newInput = inputs # The single run should still collect its SampledVars for the output maybe? # The problem here is when we call Code.collectOutput(), the sampledVars @@ -187,35 +183,51 @@ def _localTakeAstepRun(self, inDictionary): # this should default to all of the ones in the input? Is it possible to # get an input field in the outputs variable that is not in the inputs # variable defined above? - DPM 4/6/2017 - # empty dictionary corresponds to sampling data in MultiRun - model.submit(inputs, None, jobHandler, **{'SampledVars': {'prefix':'None'}, 'additionalEdits': {}}) + # + # since we don't generate a RealizationBatch from the Sampler, we have to generate one here + batch = RealizationBatch(0) + rlz = batch[0] + rlz.inputInfo.update({ + 'prefix': 'None', + 'additionalEdits': {}, + }) + model.submit(batch, inputs, None, jobHandler) + # OLD model.submit(inputs, None, jobHandler, **{'SampledVars': {'prefix':'None'}, 'additionalEdits': {}}) + # FIXME make this match multirun, and maybe share the code? while True: finishedJobs = jobHandler.getFinished() - for finishedJob in finishedJobs: - if finishedJob.getReturnCode() == 0: - # if the return code is > 0 => means the system code crashed... we do not want to make the statistics poor => we discard this run - for output in outputs: - if not isinstance(output, OutStreamEntity): - model.collectOutput(finishedJob, output) - else: - output.addOutput() + for finishedJobObjs in finishedJobs: + # for batching, the finished job object is a list + if isinstance(finishedJobObjs, list): + finishedRunList = finishedJobObjs else: - self.raiseADebug(f'the job "{finishedJob.identifier}" has failed.') - if self.failureHandling['fail']: - #add run to a pool that can be sent to the sampler later - self.failedRuns.append(copy.copy(finishedJob)) + SHOULDNOTGETHERE + finishedRunList = [finishedJobObjs] + for finishedJob in finishedRunList: + if finishedJob.getReturnCode() == 0: + # if the return code is > 0 => means the system code crashed... we do not want to make the statistics poor => we discard this run + for output in outputs: + if not isinstance(output, OutStreamEntity): + model.collectOutput(finishedJob, output) + else: + output.addOutput() else: - if finishedJob.identifier not in self.failureHandling['jobRepetitionPerformed']: - self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] = 1 - if self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] <= self.failureHandling['repetitions']: - # we re-add the failed job - jobHandler.reAddJob(finishedJob) - self.raiseAWarning(f'As prescribed in the input, trying to re-submit the job "{finishedJob.identifier}". Trial {self.failureHandling["jobRepetitionPerformed"][finishedJob.identifier]}/{self.failureHandling["repetitions"]}') - self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] += 1 - else: + self.raiseADebug(f'the job "{finishedJob.identifier}" has failed.') + if self.failureHandling['fail']: #add run to a pool that can be sent to the sampler later self.failedRuns.append(copy.copy(finishedJob)) - self.raiseAWarning(f'The job "{finishedJob.identifier}" has been submitted {self.failureHandling["repetitions"]} times, failing every time!!!') + else: + if finishedJob.identifier not in self.failureHandling['jobRepetitionPerformed']: + self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] = 1 + if self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] <= self.failureHandling['repetitions']: + # we re-add the failed job + jobHandler.reAddJob(finishedJob) + self.raiseAWarning(f'As prescribed in the input, trying to re-submit the job "{finishedJob.identifier}". Trial {self.failureHandling["jobRepetitionPerformed"][finishedJob.identifier]}/{self.failureHandling["repetitions"]}') + self.failureHandling['jobRepetitionPerformed'][finishedJob.identifier] += 1 + else: + #add run to a pool that can be sent to the sampler later + self.failedRuns.append(copy.copy(finishedJob)) + self.raiseAWarning(f'The job "{finishedJob.identifier}" has been submitted {self.failureHandling["repetitions"]} times, failing every time!!!') if jobHandler.isFinished() and len(jobHandler.getFinishedNoPop()) == 0: break time.sleep(self.sleepTime) diff --git a/ravenframework/SupervisedLearning/FeatureSelection/RFE.py b/ravenframework/SupervisedLearning/FeatureSelection/RFE.py index 2edac6fc36..72e7b184f9 100644 --- a/ravenframework/SupervisedLearning/FeatureSelection/RFE.py +++ b/ravenframework/SupervisedLearning/FeatureSelection/RFE.py @@ -20,10 +20,10 @@ #External Modules-------------------------------------------------------------------------------- import copy import gc -import numpy as np import itertools import time from collections import defaultdict +import numpy as np from scipy.stats import spearmanr from scipy.cluster import hierarchy from scipy.spatial.distance import squareform @@ -323,13 +323,22 @@ def _train(self, X, y, featuresIds, targetsIds, maskF = None, maskT = None): groupRanking_ = copy.deepcopy(ranking_) groupSupportOfSupport_ = copy.deepcopy(supportOfSupport_) - supportDataRFE = {'featuresForRanking':featuresForRanking,'mask':mask,'nFeatures':nFeatures, - 'nTargets':nTargets,'nParams':nParams,'targetsIds':targetsIds, - 'originalParams':originalParams,'supportOfSupport_':supportOfSupport_, - 'ranking_':ranking_,'nFeaturesToSelect':nFeaturesToSelect,'firstStep':step, - 'setStep':setStep,'subGroups':self.subGroups, - 'originalSupport':support_, 'parametersToInclude':self.parametersToInclude, - 'whichSpace':self.whichSpace} + supportDataRFE = {'featuresForRanking': featuresForRanking, + 'mask': mask, + 'nFeatures': nFeatures, + 'nTargets': nTargets, + 'nParams': nParams, + 'targetsIds': targetsIds, + 'originalParams': originalParams, + 'supportOfSupport_': supportOfSupport_, + 'ranking_': ranking_, + 'nFeaturesToSelect': nFeaturesToSelect, + 'firstStep': step, + 'setStep': setStep, + 'subGroups': self.subGroups, + 'originalSupport': support_, + 'parametersToInclude': self.parametersToInclude, + 'whichSpace': self.whichSpace} if useParallel: # send some data to workers @@ -350,7 +359,7 @@ def _train(self, X, y, featuresIds, targetsIds, maskF = None, maskT = None): prefix = f'subgroup_{g}' if g > 0: supportDataRFE['firstStep'] = setStep - jhandler.addJob((estimatorRef, XRef, yRef, g, outputSpace, supportDataRFE,), + jhandler.addSingleJob((estimatorRef, XRef, yRef, g, outputSpace, supportDataRFE,), self._rfe, prefix, uniqueHandler='RFE_subgroup') g += 1 @@ -477,7 +486,7 @@ def updateBestScore(it, k, score, combo, survivors): # train and get score if jhandler.availability() > 0: prefix = f'{k}_{it+1}' - jhandler.addJob((estimatorRef, XRef, yRef, combinations[it], supportDataRef,), + jhandler.addSingleJob((estimatorRef, XRef, yRef, combinations[it], supportDataRef,), self._scoring, prefix, uniqueHandler='RFE_scoring') it += 1 finishedJobs = jhandler.getFinished(uniqueHandler='RFE_scoring') From a4cffc71277ddcea8283be95dfd6221646b5a27a Mon Sep 17 00:00:00 2001 From: talbpw Date: Thu, 21 Nov 2024 15:29:01 -0700 Subject: [PATCH 10/18] EnsembleModel working --- doc/workshop/ExternalModels/projectile.py | 4 +- .../ensembleExample/1_ensemble_model.xml | 1 + ravenframework/JobHandler.py | 9 +- ravenframework/Models/EnsembleModel.py | 209 ++++++++---------- ravenframework/Models/Model.py | 16 +- ravenframework/Realizations/Realization.py | 32 +++ 6 files changed, 146 insertions(+), 125 deletions(-) diff --git a/doc/workshop/ExternalModels/projectile.py b/doc/workshop/ExternalModels/projectile.py index 1b8822ef72..bb8c47e0eb 100644 --- a/doc/workshop/ExternalModels/projectile.py +++ b/doc/workshop/ExternalModels/projectile.py @@ -83,12 +83,12 @@ def current_angle(v0, ang, vel): return np.arccos(v0 * np.cos(ang) / vel) def run(raven, inputs): - vars = {'x0': get_from_raven('x0', raven, 0), + inps = {'x0': get_from_raven('x0', raven, 0), 'y0': get_from_raven('y0', raven, 0), 'v0': get_from_raven('v0', raven, 1), 'angle': get_from_raven('angle', raven, 45), 'timeOption': get_from_raven('timeOption', raven, 0)} - res = main(vars) + res = main(inps) raven.x = res['x'] raven.y = res['y'] raven.t = res['t'] diff --git a/doc/workshop/ensembleModels/inputs/ensembleExample/1_ensemble_model.xml b/doc/workshop/ensembleModels/inputs/ensembleExample/1_ensemble_model.xml index 86ebde33bd..ba0451314c 100755 --- a/doc/workshop/ensembleModels/inputs/ensembleExample/1_ensemble_model.xml +++ b/doc/workshop/ensembleModels/inputs/ensembleExample/1_ensemble_model.xml @@ -52,6 +52,7 @@ 0 0 + 1 diff --git a/ravenframework/JobHandler.py b/ravenframework/JobHandler.py index 8efc44ed10..abf7a7d566 100644 --- a/ravenframework/JobHandler.py +++ b/ravenframework/JobHandler.py @@ -805,7 +805,8 @@ def reAddJob(self, runner): runner.trackTime('queue') self.__submittedJobs.append(runner.identifier) - def addClientJob(self, args, functionToRun, identifier, metadata=None, uniqueHandler="any", groupInfo = None): + def addClientJob(self, args, functionToRun, identifier, + metadata=None, uniqueHandler="any", groupInfo=None): """ Method to add an internal run (function execution), without consuming resources (free spots). This can be used for client handling (see @@ -831,9 +832,9 @@ def addClientJob(self, args, functionToRun, identifier, metadata=None, uniqueHan Consequentially the size is immutable @ Out, None """ - self.addJob(args, functionToRun, identifier, metadata, - forceUseThreads = True, uniqueHandler = uniqueHandler, - clientQueue = True, groupInfo = groupInfo) + self.addSingleJob(args, functionToRun, identifier, metadata, + forceUseThreads=True, uniqueHandler=uniqueHandler, + clientQueue=True, groupInfo=groupInfo) def addFinishedJob(self, data, metadata=None, uniqueHandler="any", profile=False): """ diff --git a/ravenframework/Models/EnsembleModel.py b/ravenframework/Models/EnsembleModel.py index 49b04d911c..63b889414c 100644 --- a/ravenframework/Models/EnsembleModel.py +++ b/ravenframework/Models/EnsembleModel.py @@ -15,27 +15,24 @@ EnsembleModel module, containing the class and methods to create a comunication 'pipeline' among different models in terms of Input/Output relation """ -#for future compatibility with Python 3-------------------------------------------------------------- -from __future__ import division, print_function, unicode_literals, absolute_import -#End compatibility block for Python 3---------------------------------------------------------------- - #External Modules---------------------------------------------------------------------------------- import io import sys import copy -import numpy as np import time import itertools from collections import OrderedDict -from ..Decorators.Parallelization import Parallel +import numpy as np #External Modules End-------------------------------------------------------------------------------- #Internal Modules------------------------------------------------------------------------------------ +from ..Decorators.Parallelization import Parallel from .Dummy import Dummy from ..utils import utils, InputData from ..utils.graphStructure import evaluateModelsOrder from ..Runners import Error as rerror from ..Runners.SharedMemoryRunner import InterruptibleThread +from ..Realizations import Realization #Internal Modules End-------------------------------------------------------------------------------- class EnsembleModel(Dummy): @@ -390,39 +387,21 @@ def getCurrentSetting(self): paramDict = self.getInitParams() return paramDict - def __selectInputSubset(self,modelName, kwargs): - """ - Method aimed to select the input subset for a certain model - @ In, modelName, string, the model name - @ In, kwargs , dict, the kwarded dictionary where the sampled vars are stored - @ Out, selectedkwargs , dict, the subset of variables (in a swallow copy of the kwargs dict) - """ - selectedkwargs = copy.copy(kwargs) - selectedkwargs['SampledVars'] = {} - selectedkwargs['SampledVarsPb'] = {} - for key in kwargs["SampledVars"].keys(): - if key in self.modelsDictionary[modelName]['Input']: - selectedkwargs['SampledVars'][key] = kwargs["SampledVars"][key] - selectedkwargs['SampledVarsPb'][key] = kwargs["SampledVarsPb"][key] if 'SampledVarsPb' in kwargs and key in kwargs["SampledVarsPb"] else 1. - return selectedkwargs - - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler or optimizer that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, Realization from whiech to build input @ Out, newInputs, dict, dict that returns the new inputs for each sub-model """ # check if all the inputs of the submodule are covered by the sampled vars and Outputs of the other sub-models if self.needToCheckInputs: - allCoveredVariables = list(set(itertools.chain(self.allOutputs,kwargs['SampledVars'].keys()))) - - identifier = kwargs['prefix'] - # global prefix - newKwargs = {'prefix':identifier} + allCoveredVariables = list(set(itertools.chain(self.allOutputs,rlz.keys()))) + identifier = rlz.inputInfo['prefix'] + # sub realizations + subRlzs = {'__setIdentifier': identifier} #TODO should this be a batch?? newInputs = {} ## First check the inputs if they need to be checked @@ -430,28 +409,23 @@ def createNewInput(self,myInput,samplerType,**kwargs): for modelIn, specs in self.modelsDictionary.items(): for inp in specs['Input']: if inp not in allCoveredVariables: - self.raiseAnError(RuntimeError,f"for sub-model {modelIn} the input {inp} has not been found among other models' outputs and sampled variables!") + self.raiseAnError(RuntimeError, f'for sub-model "{modelIn}" the input "{inp}" ' +\ + 'has not been found among other models\' outputs and sampled variables!') ## Now prepare the new inputs for each model for modelIn, specs in self.modelsDictionary.items(): - newKwargs[modelIn] = self.__selectInputSubset(modelIn,kwargs) - - # if specs['Instance'].type != 'Code': - # inputDict = [self._inputToInternal(self.modelsDictionary[modelIn]['InputObject'][0],newKwargs['SampledVars'].keys())] - # else: - # inputDict = self.modelsDictionary[modelIn]['InputObject'] + # FIXME this gets overwritten in _externalRun! + sub = rlz.createSubsetRlz(self.modelsDictionary[modelIn]['Input']) + subRlzs[modelIn] = sub # local prefix - newKwargs[modelIn]['prefix'] = modelIn+utils.returnIdSeparator()+identifier - newInputs[modelIn] = self.modelsDictionary[modelIn]['InputObject'] - - # if specs['Instance'].type == 'Code': - # newInputs[modelIn][1]['originalInput'] = inputDict + sub.inputInfo['prefix'] = modelIn + utils.returnIdSeparator() + identifier + newInputs[modelIn] = self.modelsDictionary[modelIn]['InputObject'] self.needToCheckInputs = False - return (newInputs, samplerType, newKwargs) + return (newInputs, samplerType, subRlzs) - def collectOutput(self,finishedJob,output): + def collectOutput(self, finishedJob, output): """ Method that collects the outputs from the previous run @ In, finishedJob, ClientRunner object, instance of the run just finished @@ -517,45 +491,41 @@ def getAdditionalInputEdits(self,inputInfo): self.modelsDictionary[modelIn]['Instance'].getAdditionalInputEdits(inputInfo) @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, subRlzs, dict(Realization), Realizations keyed by the models they belong to @ Out, returnValue, dict, This holds the output information of the evaluated sample. """ - kwargsToKeep = { keepKey: kwargs[keepKey] for keepKey in list(kwargs.keys())} - jobHandler = kwargs['jobHandler'] if self.parallelStrategy == 2 else None - Input = self.createNewInput(myInput[0], samplerType, **kwargsToKeep) - + # FIXME do I need to add inputInfo, or use rlz.asDict, for this? + kwargsToKeep = { keepKey: rlz[keepKey] for keepKey in list(rlz.keys())} + jobHandler = rlz.inputInfo['jobHandler'] if self.parallelStrategy == 2 else None + Input = self.createNewInput(myInput[0], samplerType, rlz) ## Unpack the specifics for this class, namely just the jobHandler - returnValue = (Input,self._externalRun(Input, jobHandler)) + returnValue = (Input, self._externalRun(Input, jobHandler)) return returnValue - def submit(self,myInput,samplerType,jobHandler,**kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler as a client job. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance - @ In, **kwargs, dict, is a dictionary that contains the information - coming from the sampler, a mandatory key is the sampledVars' that - contains a dictionary {'name variable':value} @ Out, None """ - prefix = kwargs['prefix'] - ## Ensemble models need access to the job handler, so let's stuff it in our ## catch all kwargs where evaluateSample can pick it up, not great, but ## will suffice until we can better redesign this whole process. - kwargs['jobHandler'] = jobHandler if self.parallelStrategy == 2 else None + # TODO who are we setting this on? + jh = jobHandler if self.parallelStrategy == 2 else None ## This may look a little weird, but due to how the parallel python library ## works, we are unable to pass a member function as a job because the ## pp library loses track of what self is, so instead we call it from the @@ -567,39 +537,42 @@ def submit(self,myInput,samplerType,jobHandler,**kwargs): self.localPollingThread = InterruptibleThread(target=self.localJobHandler.startLoop) self.localPollingThread.daemon = True self.localPollingThread.start() - nRuns = 1 - batchMode = kwargs.get("batchMode", False) - if batchMode: - nRuns = kwargs["batchInfo"]['nRuns'] - - for index in range(nRuns): - if batchMode: - kw = kwargs['batchInfo']['batchRealizations'][index] - kw['batchRun'] = index + 1 - else: - kw = kwargs - prefix = kw.get("prefix") - uniqueHandler = kw.get("uniqueHandler",'any') - forceThreads = kw.get("forceThreads",False) + for r, rlz in enumerate(batch): + # OLD # + # if batchMode: + # kw = kwargs['batchInfo']['batchRealizations'][index] + # kw['batchRun'] = index + 1 + # else: + # kw = kwargs + info = rlz.inputInfo + prefix = info.get("prefix") + info['jobHandler'] = jh # NOTE gets overwritten below for parallel strat 2 - metadata = kw + uniqueHandler = info.get("uniqueHandler", 'any') + forceThreads = info.get("forceThreads", False) - if self.parallelStrategy == 1: - jobHandler.addJob((self, myInput, samplerType, kw), self.__class__.evaluateSample, prefix, metadata=metadata, - uniqueHandler=uniqueHandler, forceUseThreads=forceThreads, - groupInfo={'id': kwargs['batchInfo']['batchId'], 'size': nRuns} if batchMode else None) - else: + metadata = info + + if self.parallelStrategy == 2: # for parallel strategy 2, the ensemble model works as a step => it needs the jobHandler - kw['jobHandler'] = jobHandler - kw['jobHandler'] = self.localJobHandler - # for parallel strategy 2, we need to make sure that the batchMode is set to False in the inner runs since only the + info['jobHandler'] = self.localJobHandler + # make sure that the batchMode is set to False in the inner runs since only the # ensemble model evaluation should be batched (THIS IS REQUIRED because the CODE does not submit runs like the other models) - kw['batchMode'] = False - jobHandler.addClientJob((self, myInput, samplerType, kw), self.__class__.evaluateSample, prefix, metadata=metadata, - uniqueHandler=uniqueHandler, - groupInfo={'id': kwargs['batchInfo']['batchId'], 'size': nRuns} if batchMode else None) - + # TODO FIXME how does this work now with batches? + # kw['batchMode'] = False + jobHandler.addClientJob( + (self, myInput, samplerType, rlz), + self.__class__.evaluateSample, + prefix, + metadata=metadata, + uniqueHandler=uniqueHandler, + forceUseThreads=forceThreads, + groupInfo={'id': batch.ID, 'size': len(batch)}) + # else: submit as batch after loop + if self.parallelStrategy == 1: + jobHandler.addJobBatch(batch, self, myInput, samplerType, self.__class__.evaluateSample) + # else: submitted client-style within loop above def __retrieveDependentOutput(self,modelIn,listOfOutputs, typeOutputs): """ @@ -622,11 +595,13 @@ def __retrieveDependentOutput(self,modelIn,listOfOutputs, typeOutputs): dependentOutputs['_indexMap'][inKey] = indices return dependentOutputs - def _externalRun(self,inRun, jobHandler = None):#, jobHandler): + def _externalRun(self, inRun, jobHandler=None):#, jobHandler): """ Method that performs the actual run of the ensemble model (separated from run method for parallelization purposes) - @ In, inRun, tuple, tuple of Inputs, e.g. inRun[0]: actual dictionary of input, inRun[1]: string, - the type of Sampler or Optimizer, inRun[2], dict, contains the information from the Sampler + @ In, inRun, tuple, tuple of Inputs, e.g. + - inRun[0]: original model inputs (e.g., files), + - inRun[1]: sampler type used (string), + - inRun[2], subRlzs, dictionary of Realizations corresponding to inputs for each submodel @ In, jobHandler, object, optional, instance of jobHandler (available if parallelStrategy==2) @ Out, returnEvaluation, tuple, the results of the assembled model: - returnEvaluation[0] dict of results from each sub-model, @@ -635,8 +610,10 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): """ originalInput = inRun[0] samplerType = inRun[1] - inputKwargs = inRun[2] - identifier = inputKwargs.pop('prefix') + subRlzs = inRun[2] # OLD inputRlz = inRun[2] + # OLD inputInfo = inputRlz.inputInfo + # OLD identifier = inputInfo.pop('prefix') + identifier = subRlzs['__setIdentifier'] tempOutputs = {} inRunTargetEvaluations = {} @@ -646,8 +623,8 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): # deepcopy assures distinct copies inRunTargetEvaluations[modelIn] = copy.deepcopy(self.localTargetEvaluations[modelIn]) residueContainer = dict.fromkeys(self.modelsDictionary.keys()) - gotOutputs = [{}]*len(self.orderList) - typeOutputs = ['']*len(self.orderList) + gotOutputs = [{}]*len(self.orderList) + typeOutputs = ['']*len(self.orderList) # if nonlinear system, initialize residue container if self.activatePicard: @@ -668,6 +645,8 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): self.raiseAMessage("Picard's Iteration "+ str(iterationCount)) for modelCnt, modelIn in enumerate(self.orderList): + inputRlz = subRlzs[modelIn] + inputInfo = inputRlz.inputInfo # clear the model's Target Evaluation data object # in case there are metadataToTransfer, let's collect them from the source metadataToTransfer = None @@ -688,7 +667,7 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): dependentOutput = self.__retrieveDependentOutput(modelIn, gotOutputs, typeOutputs) # if nonlinear system, check for initial coditions if iterationCount == 1 and self.activatePicard: - sampledVars = inputKwargs[modelIn]['SampledVars'].keys() + sampledVars = inputRlz.keys() # OLD inputKwargs[modelIn]['SampledVars'].keys() conditionsToCheck = set(self.modelsDictionary[modelIn]['Input']) - set(itertools.chain(dependentOutput.keys(),sampledVars)) for initialConditionToSet in conditionsToCheck: if initialConditionToSet in self.initialConditions.keys(): @@ -697,29 +676,37 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): self.raiseAnError(IOError,"No initial conditions provided for variable "+ initialConditionToSet) # set new identifiers suffix = '' - if 'batchRun' in inputKwargs[modelIn]: - suffix = f"{utils.returnIdSeparator()}{inputKwargs[modelIn]['batchRun']}" - inputKwargs[modelIn]['prefix'] = f"{modelIn}{utils.returnIdSeparator()}{identifier}{suffix}" - inputKwargs[modelIn]['uniqueHandler'] = f"{self.name}{identifier}{suffix}" + # TODO how do I need to modify this for new batch run? + # if 'batchRun' in inputKwargs[modelIn]: + # suffix = f"{utils.returnIdSeparator()}{inputKwargs[modelIn]['batchRun']}" + # FIXME this was already set in the createNewInput method! + # -> the Suffix is added. Should this be something the Batch takes care of? + inputInfo['prefix'] += f"{suffix}" + # OLD inputInfo['prefix'] = f"{modelIn}{utils.returnIdSeparator()}{identifier}{suffix}" + inputInfo['uniqueHandler'] = f"{self.name}{identifier}{suffix}" if metadataToTransfer is not None: - inputKwargs[modelIn]['metadataToTransfer'] = metadataToTransfer + inputInfo['metadataToTransfer'] = metadataToTransfer - for key, value in dependentOutput.items(): - inputKwargs[modelIn]["SampledVars" ][key] = dependentOutput[key] + for var in dependentOutput: + #inputInfo[modelIn]["SampledVars" ][key] = dependentOutput[key] ## FIXME it is a mistake (Andrea). The SampledVarsPb for this variable should be transferred from outside ## Who has this information? -- DPM 4/11/17 - inputKwargs[modelIn]["SampledVarsPb"][key] = 1. - self._replaceVariablesNamesWithAliasSystem(inputKwargs[modelIn]["SampledVars" ],'input',False) - self._replaceVariablesNamesWithAliasSystem(inputKwargs[modelIn]["SampledVarsPb"],'input',False) + inputInfo["SampledVarsPb"][var] = 1. + self._replaceVariablesNamesWithAliasSystem(inputRlz, 'input', False) + self._replaceVariablesNamesWithAliasSystem(inputInfo["SampledVarsPb"], 'input', False) ## FIXME: this will come after we rework the "runInfo" collection in the code ## if run info is present, we need to pass to to kwargs ##if self.runInfoDict and 'Code' == self.modelsDictionary[modelIn]['Instance'].type: ## inputKwargs[modelIn].update(self.runInfoDict) - retDict, gotOuts, evaluation = self.__advanceModel(identifier, self.modelsDictionary[modelIn], - originalInput[modelIn], inputKwargs[modelIn], - inRunTargetEvaluations[modelIn], samplerType, - iterationCount, jobHandler) + retDict, gotOuts, evaluation = self.__advanceModel(identifier, + self.modelsDictionary[modelIn], + originalInput[modelIn], + inputRlz, + inRunTargetEvaluations[modelIn], + samplerType, + iterationCount, + jobHandler) returnDict[modelIn] = retDict typeOutputs[modelCnt] = inRunTargetEvaluations[modelIn].type @@ -739,7 +726,7 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): np.asarray(residueContainer[modelIn]['iterValues'][1][out])) residueContainer[modelIn]['Norm'] = np.linalg.norm(np.asarray(list(residueContainer[modelIn]['iterValues'][1].values()))- np.asarray(list(residueContainer[modelIn]['iterValues'][0].values()))) - + # END [for modelCnt, modelIn] loop # if nonlinear system, check the total residue and convergence if self.activatePicard: iterZero = [] @@ -751,7 +738,7 @@ def _externalRun(self,inRun, jobHandler = None):#, jobHandler): self.raiseAMessage("Picard's Iteration Norm: "+ str(residueContainer['TotalResidue'])) residualPass = residueContainer['TotalResidue'] <= self.convergenceTol # sometimes there can be multiple residual values - if hasattr(residualPass,'__len__'): + if hasattr(residualPass, '__len__'): residualPass = all(residualPass) if residualPass: self.raiseAMessage("Picard's Iteration converged. Norm: "+ str(residueContainer['TotalResidue'])) diff --git a/ravenframework/Models/Model.py b/ravenframework/Models/Model.py index 9fbeeec389..20213e7721 100644 --- a/ravenframework/Models/Model.py +++ b/ravenframework/Models/Model.py @@ -421,14 +421,14 @@ def createNewInput(self, myInput, samplerType, rlz): def submit(self, batch, myInput, samplerType, jobHandler): """ - This will submit an individual sample to be evaluated by this model to a - specified jobHandler. Note, some parameters are needed by createNewInput - and thus descriptions are copied from there. - @ In, batch, RealizationBatch, list of realizations to submit as jobs - @ In, myInput, list, the inputs (list) to start from to generate the new one - @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, jobHandler, JobHandler instance, the global job handler instance - @ Out, None + This will submit an individual sample to be evaluated by this model to a + specified jobHandler. Note, some parameters are needed by createNewInput + and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, jobHandler, JobHandler instance, the global job handler instance + @ Out, None """ jobHandler.addJobBatch(batch, self, myInput, samplerType, self.__class__.evaluateSample) ### OLD ### diff --git a/ravenframework/Realizations/Realization.py b/ravenframework/Realizations/Realization.py index 9abad71246..de5d2c7a09 100644 --- a/ravenframework/Realizations/Realization.py +++ b/ravenframework/Realizations/Realization.py @@ -63,6 +63,38 @@ def asDict(self): info.update(dict((label, np.atleast_1d(val)) for label, val in self.labels.items())) return info + def createSubsetRlz(self, targetVars, ignoreMissing=True): + """ + Creates a realization, retaining the data in this realization but with only a subset + of variables. Ignores any targetVars that aren't part of this rlz. + @ In, targetVars, list(str), list of variable names to retain + @ In, ignoreMissing, bool, if True then don't error if some entries missing + @ Out, new, Realization, new realization instance + """ + new = Realization() + varKeyedEntries = [] + oneVar = next(iter(self._values)) + for key, entry in self.inputInfo.items(): + # assuming the only entries relevant to variables are first-layer dicts in inputInfo ... + if isinstance(entry, dict) and oneVar in entry: + new[key] = {} + varKeyedEntries.append(key) + # TODO other exceptions to handle? + else: + new.inputInfo[key] = entry + # fill values from this rlz into the new one + for tvar in targetVars: + #if tvar in self._values: + new[tvar] = self._values[tvar] + for key in varKeyedEntries: + if key in self.inputInfo[key]: + new.inputInfo[key][tvar] = self.inputInfo[key][tvar] + # elif not ignoreMissing: + # raise KeyError(f'Desired variable "{tvar}" missing from source Realization!') + return new + + + ######## # # dict-like members From 358bbfb9a9a2ff42cfb300933c3f6db6087ad24c Mon Sep 17 00:00:00 2001 From: talbpw Date: Mon, 25 Nov 2024 14:06:10 -0700 Subject: [PATCH 11/18] WIP stash --- ravenframework/Models/EnsembleModel.py | 2 +- .../Models/HybridModels/HybridModel.py | 142 +++++++++--------- .../Models/HybridModels/HybridModelBase.py | 112 ++++++-------- 3 files changed, 124 insertions(+), 132 deletions(-) diff --git a/ravenframework/Models/EnsembleModel.py b/ravenframework/Models/EnsembleModel.py index 63b889414c..97eda13244 100644 --- a/ravenframework/Models/EnsembleModel.py +++ b/ravenframework/Models/EnsembleModel.py @@ -497,7 +497,7 @@ def evaluateSample(self, myInput, samplerType, rlz): are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, subRlzs, dict(Realization), Realizations keyed by the models they belong to + @ In, rlz, Realization, Realization to evaluate @ Out, returnValue, dict, This holds the output information of the evaluated sample. """ # FIXME do I need to add inputInfo, or use rlz.asDict, for this? diff --git a/ravenframework/Models/HybridModels/HybridModel.py b/ravenframework/Models/HybridModels/HybridModel.py index 3acdde7203..f72954fa64 100644 --- a/ravenframework/Models/HybridModels/HybridModel.py +++ b/ravenframework/Models/HybridModels/HybridModel.py @@ -116,7 +116,7 @@ def __init__(self): self.addAssemblerObject('ROM', InputData.Quantity.one_to_infinity) self.addAssemblerObject('TargetEvaluation', InputData.Quantity.one) - def localInputAndChecks(self,xmlNode): + def localInputAndChecks(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got @@ -149,7 +149,7 @@ def localInputAndChecks(self,xmlNode): if name != 'CrowdingDistance': self.raiseAnError(IOError, "Validation method ", name, " is not implemented yet!") - def initialize(self,runInfo,inputs,initDict=None): + def initialize(self, runInfo, inputs, initDict=None): """ Method to initialize this model class @ In, runInfo, dict, is the run info from the jobHandler @@ -224,7 +224,7 @@ def getInitParams(self): tempDict['ROMs contained in HybridModel are '] = self.romsDictionary.keys() return tempDict - def getAdditionalInputEdits(self,inputInfo): + def getAdditionalInputEdits(self, inputInfo): """ Collects additional edits for the sampler to use when creating a new input. In this case, it calls all the getAdditionalInputEdits methods of the sub-models @@ -233,43 +233,47 @@ def getAdditionalInputEdits(self,inputInfo): """ HybridModelBase.getAdditionalInputEdits(self,inputInfo) - def __selectInputSubset(self,romName, kwargs): - """ - Method aimed to select the input subset for a certain model - @ In, romName, string, the rom name - @ In, kwargs , dict, the kwarded dictionary where the sampled vars are stored - @ Out, selectedKwargs , dict, the subset of variables (in a swallow copy of the kwargs dict) - """ - selectedKwargs = copy.copy(kwargs) - selectedKwargs['SampledVars'], selectedKwargs['SampledVarsPb'] = {}, {} - featsList = self.romsDictionary[romName]['Instance'].getInitParams()['Features'] - selectedKwargs['SampledVars'] = {key: kwargs['SampledVars'][key] for key in featsList} - if 'SampledVarsPb' in kwargs.keys(): - selectedKwargs['SampledVarsPb'] = {key: kwargs['SampledVarsPb'][key] for key in featsList} - else: - selectedKwargs['SampledVarsPb'] = {key: 1.0 for key in featsList} - return selectedKwargs + # OLD # + # def __selectInputSubset(self, romName, kwargs): + # """ + # Method aimed to select the input subset for a certain model + # @ In, romName, string, the rom name + # @ In, kwargs , dict, the kwarded dictionary where the sampled vars are stored + # @ Out, selectedKwargs , dict, the subset of variables (in a swallow copy of the kwargs dict) + # """ + # selectedKwargs = copy.copy(kwargs) + # selectedKwargs['SampledVars'], selectedKwargs['SampledVarsPb'] = {}, {} + # featsList = self.romsDictionary[romName]['Instance'].getInitParams()['Features'] + # selectedKwargs['SampledVars'] = {key: kwargs['SampledVars'][key] for key in featsList} + # if 'SampledVarsPb' in kwargs.keys(): + # selectedKwargs['SampledVarsPb'] = {key: kwargs['SampledVarsPb'][key] for key in featsList} + # else: + # selectedKwargs['SampledVarsPb'] = {key: 1.0 for key in featsList} + # return selectedKwargs - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, Realization to evaluate @ Out, newInputs, dict, dict that returns the new inputs for each sub-model """ self.raiseADebug("Create New Input") - useROM = kwargs['useROM'] + info = rlz.inputInfo + useROM = info['useROM'] if useROM: - identifier = kwargs['prefix'] - newKwargs = {'prefix':identifier, 'useROM':useROM} + identifier = info['prefix'] + subRlzs = {} + # OLD newKwargs = {'prefix':identifier, 'useROM':useROM} for romName in self.romsDictionary.keys(): - newKwargs[romName] = self.__selectInputSubset(romName, kwargs) - newKwargs[romName]['prefix'] = romName+utils.returnIdSeparator()+identifier - newKwargs[romName]['uniqueHandler'] = self.name+identifier + featsList = self.romsDictionary[romName]['Instance'].getInitParams()['Features'] + subRlz = rlz.createSubsetRlz(featsList) + subRlzs[romName] = subRlz + subRlz.inputInfo['prefix'] = romName+utils.returnIdSeparator()+identifier + subRlz.inputInfo['uniqueHandler'] = self.name+identifier else: - newKwargs = copy.deepcopy(kwargs) + subRlzs = rlz # this feels implicit and strange if self.modelInstance.type == 'Code': codeInput = [] romInput = [] @@ -281,17 +285,17 @@ def createNewInput(self,myInput,samplerType,**kwargs): else: self.raiseAnError(IOError, "The type of input ", elem.name, " can not be accepted!") if useROM: - return (romInput, samplerType, newKwargs) + return (romInput, samplerType, subRlzs) else: - return (codeInput, samplerType, newKwargs) - return (myInput, samplerType, newKwargs) + return (codeInput, samplerType, subRlzs) + else: + return (myInput, samplerType, subRlzs) - def trainRom(self, samplerType, kwargs): + def trainRom(self): """ This function will train all ROMs if they are not converged @ In, samplerType, string, the type of sampler - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, point in sample space to evaluate @ Out, None """ self.raiseADebug("Start to train roms") @@ -374,29 +378,27 @@ def checkRomConvergence(self): self.raiseADebug("All ROMs are converged") return converged - def checkRomValidity(self, kwargs): + def checkRomValidity(self, rlz): """ This function will check the validity of all roms - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} + @ In, rlz, Realization, point in sample space to evaluate @ Out, None """ allValid = False for selectionMethod, params in self.validationMethod.items(): if selectionMethod == 'CrowdingDistance': - allValid = self.__crowdingDistanceMethod(params, kwargs['SampledVars']) + allValid = self.__crowdingDistanceMethod(params, rlz) else: self.raiseAnError(IOError, "Unknown model selection method ", selectionMethod, " is given!") if allValid: self.raiseADebug("ROMs are all valid for given model ", self.modelInstance.name) return allValid - def __crowdingDistanceMethod(self, settingDict, varDict): + def __crowdingDistanceMethod(self, settingDict, rlz): """ This function will check the validity of all roms based on the crowding distance method @ In, settingDict, dict, stores the setting information for the crowding distance method - @ In, varDict, dict, is a dictionary that contains the information coming from the sampler, - i.e. {'name variable':value} + @ In, rlz, Realization, point in sample space to evaluate @ Out, allValid, bool, True if the given sampled point is valid for all roms, otherwise False """ allValid = True @@ -405,7 +407,7 @@ def __crowdingDistanceMethod(self, settingDict, varDict): # generate the data for input parameters paramsList = romInfo['Instance'].getInitParams()['Features'] trainInput = self._extractInputs(romInfo['Instance'].trainingSet, paramsList) - currentInput = self._extractInputs(varDict, paramsList) + currentInput = self._extractInputs(rlz, paramsList) if self.__crowdingDistance is None or self.__crowdingDistance.size != trainInput.shape[1]: #XXX Note that if self.__crowdingDistance.size != trainInput.shape[1] # occurs, this is technically a bug. @@ -472,11 +474,12 @@ def amIReadyToTrainROM(self): ready = True return ready - def submit(self,myInput,samplerType,jobHandler,**kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler as a client job. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to @@ -487,38 +490,43 @@ def submit(self,myInput,samplerType,jobHandler,**kwargs): contains a dictionary {'name variable':value} @ Out, None """ - prefix = kwargs['prefix'] - self.tempOutputs['uncollectedJobIds'].append(prefix) - if self.amIReadyToTrainROM(): - self.trainRom(samplerType, kwargs) - self.romConverged = self.checkRomConvergence() - if self.romConverged: - self.romValid = self.checkRomValidity(kwargs) - else: - self.romValid = False - if self.romValid: - self.modelIndicator[prefix] = 1 - else: - self.modelIndicator[prefix] = 0 - kwargs['useROM'] = self.romValid - self.raiseADebug(f"Submit job with job identifier: {kwargs['prefix']}, Running ROM: {self.romValid} ") - HybridModelBase.submit(self,myInput,samplerType,jobHandler,**kwargs) + # TODO does this ever receive a batch longer than 1? + for r, rlz in enumerate(batch): + prefix = rlz['prefix'] + self.tempOutputs['uncollectedJobIds'].append(prefix) + if self.amIReadyToTrainROM(): + self.trainRom() + self.romConverged = self.checkRomConvergence() + if self.romConverged: + self.romValid = self.checkRomValidity(rlz) + else: + self.romValid = False + if self.romValid: + self.modelIndicator[prefix] = 1 + else: + self.modelIndicator[prefix] = 0 + rlz.inputInfo['useROM'] = self.romValid + self.raiseADebug(f"Submit job with job identifier: {prefix}, Running ROM: {self.romValid} ") + HybridModelBase.submit(self, batch, myInput, samplerType, jobHandler) - def _externalRun(self,inRun, jobHandler): + def _externalRun(self, inRun, jobHandler): """ Method that performs the actual run of the hybrid model (separated from run method for parallelization purposes) - @ In, inRun, tuple, tuple of Inputs (inRun[0] actual input, inRun[1] type of sampler, - inRun[2] dictionary that contains information coming from sampler) + @ In, inRun, tuple, tuple of Inputs: + - inRun[0] actual input, + - inRun[1] type of sampler, + - inRun[2] realization(s) coming from sampler @ In, jobHandler, instance, instance of jobHandler @ Out, exportDict, dict, dict of results from this hybrid model """ self.raiseADebug("External Run") originalInput = inRun[0] samplerType = inRun[1] - inputKwargs = inRun[2] - identifier = inputKwargs.pop('prefix') + rlzs = inRun[2] # OLD was inputKwargs + FIXMEWORKINGHERE + # identifier = inputKwargs.pop('prefix') useROM = inputKwargs.pop('useROM') - uniqueHandler = self.name + identifier + # uniqueHandler = self.name + identifier if useROM: # run roms exportDict = {} @@ -583,7 +591,7 @@ def _externalRun(self,inRun, jobHandler): exportDict['useROM'] = useROM return exportDict - def collectOutput(self,finishedJob,output): + def collectOutput(self, finishedJob, output): """ Method that collects the outputs from the previous run @ In, finishedJob, ClientRunner object, instance of the run just finished diff --git a/ravenframework/Models/HybridModels/HybridModelBase.py b/ravenframework/Models/HybridModels/HybridModelBase.py index c341fbbfee..a68580da6c 100644 --- a/ravenframework/Models/HybridModels/HybridModelBase.py +++ b/ravenframework/Models/HybridModels/HybridModelBase.py @@ -78,7 +78,7 @@ def __init__(self): # assembler objects to be requested self.addAssemblerObject('Model', InputData.Quantity.one_to_infinity) - def localInputAndChecks(self,xmlNode): + def localInputAndChecks(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got @@ -94,7 +94,7 @@ def localInputAndChecks(self,xmlNode): self.createWorkingDir = True self._isThereACode = True # there is a code - def initialize(self,runInfo,inputs,initDict=None): + def initialize(self, runInfo, inputs, initDict=None): """ Method to initialize this model class @ In, runInfo, dict, is the run info from the jobHandler @@ -123,7 +123,7 @@ def getInitParams(self): tempDict = OrderedDict() return tempDict - def getAdditionalInputEdits(self,inputInfo): + def getAdditionalInputEdits(self, inputInfo): """ Collects additional edits for the sampler to use when creating a new input. In this case, it calls all the getAdditionalInputEdits methods of the sub-models @@ -134,90 +134,77 @@ def getAdditionalInputEdits(self,inputInfo): modelInstance.getAdditionalInputEdits(inputInfo) @abc.abstractmethod - def createNewInput(self,myInput,samplerType,**kwargs): + def createNewInput(self, rlz, myInput, samplerType): """ This function will return a new input to be submitted to the model, it is called by the sampler. + @ In, rlz, Realization, Realization to evaluate @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, newInputs, dict, dict that returns the new inputs for each sub-model """ - def submit(self,myInput,samplerType,jobHandler,**kwargs): + def submit(self, batch, myInput, samplerType, jobHandler): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler as a client job. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. + @ In, batch, RealizationBatch, list of realizations to submit as jobs @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance - @ In, **kwargs, dict, is a dictionary that contains the information - coming from the sampler, a mandatory key is the sampledVars' that - contains a dictionary {'name variable':value} @ Out, None """ ## Hybrid models need access to the job handler, so let's stuff it in our ## catch all kwargs where evaluateSample can pick it up, not great, but ## will suffice until we can better redesign this whole process. - nRuns = 1 - batchMode = kwargs.get("batchMode", False) - if batchMode: - nRuns = kwargs["batchInfo"]['nRuns'] + for rlz in batch: + info = rlz.inputInfo + info['jobHandler'] = jobHandler - for index in range(nRuns): - if batchMode: - kw = kwargs['batchInfo']['batchRealizations'][index] - kw['batchMode'] = False - else: - kw = kwargs - - kw['jobHandler'] = jobHandler - - prefix = kw.get("prefix") - uniqueHandler = kw.get("uniqueHandler",'any') - ## These kw are updated by createNewInput, so the job either should not - ## have access to the metadata, or it needs to be updated from within the - ## evaluateSample function, which currently is not possible since that - ## function does not know about the job instance. - metadata = kw + prefix = info.get("prefix") + uniqueHandler = info.get("uniqueHandler",'any') - jobHandler.addClientJob((self, myInput, samplerType, kw), self.__class__.evaluateSample, prefix, metadata=metadata, - uniqueHandler=uniqueHandler, - groupInfo={'id': kwargs['batchInfo']['batchId'], 'size': nRuns} if batchMode else None) + # TODO submit as batch? + jobHandler.addClientJob( + (self, myInput, samplerType, rlz), + self.__class__.evaluateSample, + prefix, + metadata=info, + uniqueHandler=uniqueHandler, + groupInfo={'id': batch.ID, 'size': len(batch)}) @Parallel() - def evaluateSample(self, myInput, samplerType, kwargs): + def evaluateSample(self, myInput, samplerType, rlz): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input - @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, - a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} - @ Out, rlz, dict, This holds the output information of the evaluated sample. + @ In, rlz, Realization, Realization to evaluate + @ Out, out, dict, This holds the output information of the evaluated sample. """ self.raiseADebug("Evaluate Sample") - kwargsKeys = list(kwargs.keys()) - kwargsKeys.pop(kwargsKeys.index("jobHandler")) - kwargsToKeep = {keepKey: kwargs[keepKey] for keepKey in kwargsKeys} - jobHandler = kwargs['jobHandler'] - newInput = self.createNewInput(myInput, samplerType, **kwargsToKeep) + excludeKeys = ['jobHandler'] + kwargsKeys = list(x for x in rlz.inputInfo.keys() if x not in excludeKeys) + # FIXME what all needs to go in this? rlz.asDict? + # OLD kwargsToKeep = {keepKey: kwargs[keepKey] for keepKey in kwargsKeys} + jobHandler = rlz.inputInfo['jobHandler'] + newInput = self.createNewInput(myInput, samplerType, rlz) ## Unpack the specifics for this class, namely just the jobHandler - result = self._externalRun(newInput,jobHandler) - # assure rlz has all metadata - rlz = dict((var,np.atleast_1d(kwargsToKeep[var])) for var in kwargsToKeep.keys()) - # update rlz with input space from inRun and output space from result - rlz.update(dict((var,np.atleast_1d(kwargsToKeep['SampledVars'][var] - if var in kwargs['SampledVars'] else result[var])) - for var in set(itertools.chain(result.keys(),kwargsToKeep['SampledVars'].keys())))) - return rlz + result = self._externalRun(newInput, jobHandler) + # assure out has all metadata + out = dict((var,np.atleast_1d(val)) for var, val in rlz.inputInfo.items() if var not in excludeKeys) + # update out with input space from inRun and output space from result + # TODO there's gotta be an easier way to say this ... + out.update(dict((var, np.atleast_1d(rlz[var] if var in rlz else result[var])) + for var in set(itertools.chain(result.keys(),rlz.keys())))) + return out @abc.abstractmethod - def _externalRun(self,inRun, jobHandler): + def _externalRun(self, inRun, jobHandler): """ Method that performs the actual run of the essembled model (separated from run method for parallelization purposes) @ In, inRun, tuple, tuple of Inputs (inRun[0] actual input, inRun[1] type of sampler, @@ -226,7 +213,7 @@ def _externalRun(self,inRun, jobHandler): @ Out, exportDict, dict, dict of results from this hybrid model """ - def collectOutput(self,finishedJob,output): + def collectOutput(self, finishedJob, output): """ Method that collects the outputs from the previous run @ In, finishedJob, ClientRunner object, instance of the run just finished @@ -235,25 +222,22 @@ def collectOutput(self,finishedJob,output): """ Dummy.collectOutput(self, finishedJob, output) - def _extractInputs(self,dataIn, paramsList): + def _extractInputs(self, dataIn, rlz): """ - Extract the the parameters in the paramsList from the given data object dataIn + Extract the the parameters in the realization from the given data object dataIn @ dataIn, Instance or Dict, data object or dictionary contains the input and output parameters - @ paramsList, List, List of parameter names + @ In, rlz, Realization, point in sample space to evaluate @ localInput, numpy.array, array contains the values of selected input and output parameters """ localInput = [] - if type(dataIn) == dict: - for elem in paramsList: - if elem in dataIn.keys(): - localInput.append(np.atleast_1d(dataIn[elem])) - else: - self.raiseAnError(IOError, "Parameter ", elem, " is not found!") - else: - self.raiseAnError(IOError, "The input type '", inputType, "' can not be accepted!") + for elem in rlz: + if elem in dataIn.keys(): + localInput.append(np.atleast_1d(dataIn[elem])) + else: + self.raiseAnError(IOError, "Parameter ", elem, " is not found!") return np.asarray(localInput) - def _mergeDict(self,exportDict, tempExportDict): + def _mergeDict(self, exportDict, tempExportDict): """ This function will combine two dicts into one @ In, exportDict, dict, dictionary stores the input, output and metadata From b1a456587a3ca14ca4a5952329331d4efbec70c8 Mon Sep 17 00:00:00 2001 From: talbpw Date: Tue, 17 Dec 2024 15:51:39 -0700 Subject: [PATCH 12/18] got clean run of GradientDescent, needs testing --- ravenframework/DataObjects/DataSet.py | 16 +- ravenframework/Models/ExternalModel.py | 7 +- .../Models/HybridModels/HybridModel.py | 176 ++++++++++-------- .../Optimizers/BayesianOptimizer.py | 15 +- ravenframework/Optimizers/GeneticAlgorithm.py | 11 +- ravenframework/Optimizers/GradientDescent.py | 27 ++- ravenframework/Optimizers/Optimizer.py | 7 +- ravenframework/Optimizers/RavenSampled.py | 131 +++++++------ ravenframework/Samplers/AdaptiveSampler.py | 6 - ravenframework/Samplers/Grid.py | 9 +- ravenframework/Samplers/Sampler.py | 95 +++++++--- ravenframework/Steps/MultiRun.py | 1 - 12 files changed, 262 insertions(+), 239 deletions(-) diff --git a/ravenframework/DataObjects/DataSet.py b/ravenframework/DataObjects/DataSet.py index 4c48ddca87..8d349a42a6 100644 --- a/ravenframework/DataObjects/DataSet.py +++ b/ravenframework/DataObjects/DataSet.py @@ -638,16 +638,16 @@ def realization(self, index=None, matchDict=None, noMatchDict=None, tol=1e-15, u rlzs = rlz if type(rlz).__name__ == "list" else [rlz] rlzs = [self._addIndexMapToRlz(rl) for rl in rlzs] dims = self.getDimensions() - print('*'*80) - print('DEBUGG whoami:', self.name) - print('DEBUGG dims:', dims) + # print('*'*80) + # print('DEBUGG whoami:', self.name) + # print('DEBUGG dims:', dims) for index, rl in enumerate(rlzs): d = {k:{'dims':tuple(dims[k]) ,'data': v} for (k,v) in rl.items() if k not in ['_indexMap']} - print('*'*80) - print('DEBUGG d:') - for k, v in d.items(): - print(k, v) - print('*'*80) + # print('*'*80) + # print('DEBUGG d:') + # for k, v in d.items(): + # print(k, v) + # print('*'*80) rlz[index] = xr.Dataset.from_dict(d) if len(rlzs) > 1: # concatenate just in case there are multiple realizations diff --git a/ravenframework/Models/ExternalModel.py b/ravenframework/Models/ExternalModel.py index bab609570f..ea20d32e6e 100644 --- a/ravenframework/Models/ExternalModel.py +++ b/ravenframework/Models/ExternalModel.py @@ -336,14 +336,9 @@ def evaluateSample(self, myInput, samplerType, rlz): res.update(dict((var, np.atleast_1d(val)) for var, val in rlz.items())) if '_indexMap' in res: res['_indexMap'].update(evalIndexMap) - print('DEBUGG what is here?') - print('DEBUGG indexMap:', res.get('_indexMap', None)) - print('DEBUGG res:') - for k, v in res.items(): - print('DEBUGG ... ', k, v) return res - def collectOutput(self,finishedJob,output,options=None): + def collectOutput(self, finishedJob, output, options=None): """ Method that collects the outputs from the previous run @ In, finishedJob, InternalRunner object, instance of the run just finished diff --git a/ravenframework/Models/HybridModels/HybridModel.py b/ravenframework/Models/HybridModels/HybridModel.py index f72954fa64..f7f333aaaa 100644 --- a/ravenframework/Models/HybridModels/HybridModel.py +++ b/ravenframework/Models/HybridModels/HybridModel.py @@ -233,24 +233,6 @@ def getAdditionalInputEdits(self, inputInfo): """ HybridModelBase.getAdditionalInputEdits(self,inputInfo) - # OLD # - # def __selectInputSubset(self, romName, kwargs): - # """ - # Method aimed to select the input subset for a certain model - # @ In, romName, string, the rom name - # @ In, kwargs , dict, the kwarded dictionary where the sampled vars are stored - # @ Out, selectedKwargs , dict, the subset of variables (in a swallow copy of the kwargs dict) - # """ - # selectedKwargs = copy.copy(kwargs) - # selectedKwargs['SampledVars'], selectedKwargs['SampledVarsPb'] = {}, {} - # featsList = self.romsDictionary[romName]['Instance'].getInitParams()['Features'] - # selectedKwargs['SampledVars'] = {key: kwargs['SampledVars'][key] for key in featsList} - # if 'SampledVarsPb' in kwargs.keys(): - # selectedKwargs['SampledVarsPb'] = {key: kwargs['SampledVarsPb'][key] for key in featsList} - # else: - # selectedKwargs['SampledVarsPb'] = {key: 1.0 for key in featsList} - # return selectedKwargs - def createNewInput(self, myInput, samplerType, rlz): """ This function will return a new input to be submitted to the model, it is called by the sampler. @@ -520,75 +502,107 @@ def _externalRun(self, inRun, jobHandler): @ Out, exportDict, dict, dict of results from this hybrid model """ self.raiseADebug("External Run") + subRlzs = inRun[2] # OLD was inputKwargs, comes from createNewInput + oneRlz = next(iter(subRlzs)) + identifier = oneRlz.inputInfo['prefix'] # FIXME should be batch ID, not sample ID + # TODO attach this to the batch, instead of the single realizations? + useROM = oneRlz.inputInfo['useROM'] # TODO need pop? inputKwargs.pop('useROM') + uniqueHandler = self.name + identifier + if useROM: + exportDict = self._runROMs(inRun, jobHandler, identifier, uniqueHandler) + else: # useCode, I guess? + exportDict = self._runCode(inRun, jobHandler, identifier, uniqueHandler) + # used in the collectOutput + exportDict['useROM'] = useROM + return exportDict + + def _runROMs(self, inRun, jobHandler, identifier, uniqueHandler): + """ + Submit ROMs for evaluation, and collect results + @ In, inRun, tuple, run input data (see _externalRun) + @ In, jobHandler, JobHandler, entity to whom jobs are submitted + @ In, identifier, str, identifier for this set of job submissions + @ In, uniqueHandler, str, tag for finding jobs submitted by this request + @ Out, exportDict, dict, results of runs + """ originalInput = inRun[0] samplerType = inRun[1] - rlzs = inRun[2] # OLD was inputKwargs - FIXMEWORKINGHERE - # identifier = inputKwargs.pop('prefix') - useROM = inputKwargs.pop('useROM') - # uniqueHandler = self.name + identifier - if useROM: - # run roms - exportDict = {} - self.raiseADebug("Switch to ROMs") - # submit all the roms - for romName, romInfo in self.romsDictionary.items(): - inputKwargs[romName]['prefix'] = romName+utils.returnIdSeparator()+identifier - nextRom = False - while not nextRom: - if jobHandler.availability() > 0: - with self.__busyDictLock: - busySet = romInfo['Busy'] - romInfo['Instance'].submit(originalInput, samplerType, jobHandler, **inputKwargs[romName]) - busySet.add(inputKwargs[romName]['prefix']) - self.__busyDict[inputKwargs[romName]['prefix']] = romName - self.raiseADebug("Job ", romName, " with identifier ", identifier, " is submitted, busySet", busySet) - nextRom = True - else: - time.sleep(self.sleepTime) - # collect the outputs from the runs of ROMs - while True: - finishedJobs = jobHandler.getFinished(uniqueHandler=uniqueHandler) - for finishedRun in finishedJobs: - with self.__busyDictLock: - jobRom = self.__busyDict[finishedRun.identifier] - self.raiseADebug("collect job with identifier ", identifier, ' internal identifier ',finishedRun.identifier, ' rom ', jobRom, ' busy ', self.romsDictionary[jobRom]['Busy']) - self.romsDictionary[jobRom]['Busy'].remove(finishedRun.identifier) - evaluation = finishedRun.getEvaluation() - if isinstance(evaluation, rerror): - self.raiseAnError(RuntimeError, "The job identified by "+finishedRun.identifier+" failed!") - # collect output in temporary data object - tempExportDict = evaluation - exportDict = self._mergeDict(exportDict, tempExportDict) - if jobHandler.areTheseJobsFinished(uniqueHandler=uniqueHandler): - self.raiseADebug("Jobs with uniqueHandler ", uniqueHandler, "are collected!") - break - time.sleep(self.sleepTime) - exportDict['prefix'] = identifier - else: - # run model - inputKwargs['prefix'] = self.modelInstance.name+utils.returnIdSeparator()+identifier - inputKwargs['uniqueHandler'] = self.name + identifier - moveOn = False - while not moveOn: + subRlzs = inRun[2] + exportDict = {} + self.raiseADebug("Switch to ROMs") + # submit all the roms + for romName, romInfo in self.romsDictionary.items(): + rlz = subRlzs[romName] + info = rlz.inputInfo + rlz['prefix'] = romName+utils.returnIdSeparator()+identifier + nextRom = False + while not nextRom: + # FIXME why is jobHandler not handling its own availability? if jobHandler.availability() > 0: - self.modelInstance.submit(originalInput, samplerType, jobHandler, **inputKwargs) - self.raiseADebug("Job submitted for model ", self.modelInstance.name, " with identifier ", identifier) - moveOn = True + with self.__busyDictLock: + busySet = romInfo['Busy'] + # FIXME submit is expecting a batch as the first member, submit the batch? + romInfo['Instance'].submit(rlz, originalInput, samplerType, jobHandler) + busySet.add(info['prefix']) + self.__busyDict[info['prefix']] = romName + self.raiseADebug("Job ", romName, " with identifier ", identifier, " is submitted, busySet", busySet) + nextRom = True else: time.sleep(self.sleepTime) - while not jobHandler.isThisJobFinished(self.modelInstance.name+utils.returnIdSeparator()+identifier): + exportDict['prefix'] = identifier + # collect the outputs from the runs of ROMs + while True: + finishedJobs = jobHandler.getFinished(uniqueHandler=uniqueHandler) + for finishedRun in finishedJobs: + with self.__busyDictLock: + jobRom = self.__busyDict[finishedRun.identifier] + self.raiseADebug("collect job with identifier ", identifier, ' internal identifier ',finishedRun.identifier, ' rom ', jobRom, ' busy ', self.romsDictionary[jobRom]['Busy']) + self.romsDictionary[jobRom]['Busy'].remove(finishedRun.identifier) + evaluation = finishedRun.getEvaluation() + if isinstance(evaluation, rerror): + self.raiseAnError(RuntimeError, "The job identified by "+finishedRun.identifier+" failed!") + # collect output in temporary data object + tempExportDict = evaluation + exportDict = self._mergeDict(exportDict, tempExportDict) + if jobHandler.areTheseJobsFinished(uniqueHandler=uniqueHandler): + self.raiseADebug("Jobs with uniqueHandler ", uniqueHandler, "are collected!") + break + time.sleep(self.sleepTime) + exportDict['prefix'] = identifier + return exportDict + + def _runCode(self, inRun, jobHandler, identifier, uniqueHandler): + """ + Submit code for evaluation, and collect results + @ In, inRun, tuple, run input data (see _externalRun) + @ In, jobHandler, JobHandler, entity to whom jobs are submitted + @ In, identifier, str, identifier for this set of job submissions + @ In, uniqueHandler, str, tag for finding jobs submitted by this request + @ Out, exportDict, dict, results of runs + """ + originalInput = inRun[0] + samplerType = inRun[1] + rlz = inRun[2] # this is the result of the weird implicitness from createNewInput + # run model + rlz.inputInfo['prefix'] = self.modelInstance.name+utils.returnIdSeparator()+identifier + rlz.inputInfo['uniqueHandler'] = self.name + identifier + moveOn = False + while not moveOn: + if jobHandler.availability() > 0: + self.modelInstance.submit(rlz, originalInput, samplerType, jobHandler) + self.raiseADebug("Job submitted for model ", self.modelInstance.name, " with identifier ", identifier) + moveOn = True + else: time.sleep(self.sleepTime) - self.raiseADebug("Job finished ", self.modelInstance.name, " with identifier ", identifier) - finishedRun = jobHandler.getFinished(jobIdentifier = inputKwargs['prefix'], uniqueHandler = uniqueHandler) - evaluation = finishedRun[0].getEvaluation() - if isinstance(evaluation, rerror): - self.raiseAnError(RuntimeError, "The model "+self.modelInstance.name+" identified by "+finishedRun[0].identifier+" failed!") - # collect output in temporary data object - exportDict = evaluation - self.raiseADebug("Create exportDict") - # used in the collectOutput - exportDict['useROM'] = useROM + while not jobHandler.isThisJobFinished(self.modelInstance.name+utils.returnIdSeparator()+identifier): + time.sleep(self.sleepTime) + self.raiseADebug("Job finished ", self.modelInstance.name, " with identifier ", identifier) + finishedRun = jobHandler.getFinished(jobIdentifier=rlz.inputInfo['prefix'], uniqueHandler=uniqueHandler) + evaluation = finishedRun[0].getEvaluation() + if isinstance(evaluation, rerror): + self.raiseAnError(RuntimeError, f'The model "{self.modelInstance.name}" identified by "{finishedRun[0].identifier}" failed!') + # collect output in temporary data object + exportDict = evaluation return exportDict def collectOutput(self, finishedJob, output): diff --git a/ravenframework/Optimizers/BayesianOptimizer.py b/ravenframework/Optimizers/BayesianOptimizer.py index 6c72384d2c..948a49abc8 100644 --- a/ravenframework/Optimizers/BayesianOptimizer.py +++ b/ravenframework/Optimizers/BayesianOptimizer.py @@ -16,18 +16,15 @@ auth: Anthoney Griffith (@grifaa) date: May, 2023 """ -#External Modules------------------------------------------------------------------------------------ import copy + import numpy as np from pyDOE3 import lhs import scipy.optimize as sciopt -#External Modules End-------------------------------------------------------------------------------- -#Internal Modules------------------------------------------------------------------------------------ -from ..utils import InputData, InputTypes, mathUtils +from ..utils import InputData, InputTypes from .RavenSampled import RavenSampled from .acquisitionFunctions import factory as acqFactory -#Internal Modules End-------------------------------------------------------------------------------- class BayesianOptimizer(RavenSampled): @@ -210,8 +207,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): # FIXME currently BO assumes only one optimization 'trajectory' RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) self._convergenceInfo = {0:{'persistence':0, 'converged':False}} - meta = ['batchId'] - self.addMetaKeys(meta) + # OLD should be handled elsewhere meta = ['batchId'] + # self.addMetaKeys(meta) self._initialSampleSize = len(self._initialValues) self.batch = self._initialSampleSize @@ -227,8 +224,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): self.raiseAnError(RuntimeError, f'Invalid model type was provided: {self._model.subType}. Bayesian Optimizer' f'currently only accepts the following: {["GaussianProcessRegressor"]}') elif self._model.supervisedContainer[0].multioutputWrapper: - self.raiseAnError(RuntimeError, f'When using GPR ROM for Bayesian Optimization, must set ' - f'node to False') + self.raiseAnError(RuntimeError, 'When using GPR ROM for Bayesian Optimization, must set ' + + 'node to False') elif len(self._model.supervisedContainer[0].target) != 1: self.raiseAnError(RuntimeError, f'Only one target allowed when using GPR ROM for Bayesian Optimizer! ' f'Received {len(self._model.supervisedContainer[0].target)}') diff --git a/ravenframework/Optimizers/GeneticAlgorithm.py b/ravenframework/Optimizers/GeneticAlgorithm.py index 2308868c87..555c56cc7f 100644 --- a/ravenframework/Optimizers/GeneticAlgorithm.py +++ b/ravenframework/Optimizers/GeneticAlgorithm.py @@ -72,7 +72,7 @@ def __init__(self): self._convergenceInfo = {} # by traj, the persistence and convergence information for most recent opt self._requiredPersistence = 0 # consecutive persistence required to mark convergence self.needDenormalized() # the default in all optimizers is to normalize the data which is not the case here - self.batchId = 0 + # self.batchId = 0 self.population = None # panda Dataset container containing the population at the beginning of each generation iteration self.popAge = None # population age self.fitness = None # population fitness @@ -395,8 +395,9 @@ def initialize(self, externalSeeding=None, solutionExport=None): """ RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) - meta = ['batchId'] - self.addMetaKeys(meta) + # TODO remove, moved to Sampler.initialize + # meta = ['batchId'] + # self.addMetaKeys(meta) self.batch = self._populationSize if self._populationSize != len(self._initialValues): self.raiseAnError(IOError, f'Number of initial values provided for each variable is {len(self._initialValues)}, while the population size is {self._populationSize}') @@ -431,11 +432,11 @@ def needDenormalized(self): # Run Methods # ############### - def _useRealization(self, info, rlz): + def _useRealizationBatch(self, meta, rlz): """ Used to feedback the collected runs into actionable items within the sampler. This is called by localFinalizeActualSampling, and hence should contain the main skeleton. - @ In, info, dict, identifying information about the realization + @ In, meta, dict, job information from the collected realizations @ In, rlz, xr.Dataset, new batched realizations @ Out, None """ diff --git a/ravenframework/Optimizers/GradientDescent.py b/ravenframework/Optimizers/GradientDescent.py index 452f579f4e..1a3161d23b 100644 --- a/ravenframework/Optimizers/GradientDescent.py +++ b/ravenframework/Optimizers/GradientDescent.py @@ -17,18 +17,11 @@ Created 2020-01 @author: talbpaul """ -#for future compatibility with Python 3-------------------------------------------------------------- -from __future__ import division, print_function, unicode_literals, absolute_import -#End compatibility block for Python 3---------------------------------------------------------------- - -#External Modules------------------------------------------------------------------------------------ import copy +from pprint import pprint from collections import deque, defaultdict import numpy as np -#External Modules End-------------------------------------------------------------------------------- - -#Internal Modules------------------------------------------------------------------------------------ from ..utils import InputData, InputTypes, mathUtils from .RavenSampled import RavenSampled @@ -38,8 +31,6 @@ from .stepManipulators import NoConstraintResolutionFound, NoMoreStepsNeeded -#Internal Modules End-------------------------------------------------------------------------------- - class GradientDescent(RavenSampled): """ Base class for Sampled Optimizers using gradient descent optimization methods. @@ -172,8 +163,10 @@ def getSolutionExportVariableNames(cls): """ # cannot be determined before run-time due to variables and prefixes. ok = super(GradientDescent, cls).getSolutionExportVariableNames() - new = {'stepSize': 'the size of step taken in the normalized input space to arrive at each optimal point'} - new['conv_{CONV}'] = 'status of each given convergence criteria' + new = { + 'stepSize': 'the size of step taken in the normalized input space to arrive at each optimal point', + 'conv_{CONV}': 'status of each given convergence criteria', + } # TODO need to include StepManipulators and GradientApproximators solution export entries as well! # -> but really should only include active ones, not all of them. This seems like it should work # when the InputData can scan forward to determine which entities are actually used. @@ -290,6 +283,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): self._gradientInstance.initialize(self.toBeSampled) self._stepInstance.initialize(self.toBeSampled, persistence=self._requiredPersistence) self._acceptInstance.initialize() + # set the batch size + self.batch = 1 + self._gradientInstance.numGradPoints() # if single trajectory, turn off follower termination if len(self._initialValues) < 2: self.raiseADebug('Setting terminateFollowers to False since only 1 trajectory exists.') @@ -333,8 +328,8 @@ def checkConvergence(self, traj, new, old): def _useRealization(self, info, rlz): """ Used to feedback the collected runs into actionable items within the sampler. - @ In, info, dict, identifying information about the realization - @ In, rlz, dict, realized realization + @ In, meta, dict, meta information from job run + @ In, data, xr.Dataset, batch of realization data (not actual RealizationBatch) @ Out, None """ traj = info['traj'] @@ -737,12 +732,16 @@ def _rejectOptPoint(self, traj, info, old): @ Out, none """ # cancel grad runs + # FIXME temp disable, can we actually cancel these still in batching? self._cancelAssociatedJobs(info['traj'], step=info['step']) # what do do if a point is rejected? # for now, rerun the opt point and gradients, AND cut step # TODO user option to EITHER rerun opt point OR cut step! # initialize a new step self._initializeStep(traj) + # update prefix, batch IDs? or is that something that should happen ... somewhere else? + + # FIXME do we also need to register these differently? # track that the next recommended step size for this traj should be "cut" self._stepRecommendations[traj] = 'shrink' # get new grads around new point diff --git a/ravenframework/Optimizers/Optimizer.py b/ravenframework/Optimizers/Optimizer.py index 8561ccac82..e521ca0ce8 100644 --- a/ravenframework/Optimizers/Optimizer.py +++ b/ravenframework/Optimizers/Optimizer.py @@ -18,16 +18,12 @@ Reworked 2020-01 @author: talbpaul """ -# External Modules---------------------------------------------------------------------------------- import copy import abc import numpy as np -# External Modules End------------------------------------------------------------------------------ -# Internal Modules---------------------------------------------------------------------------------- from ..utils import randomUtils, InputData, InputTypes from ..Samplers import AdaptiveSampler, Sampler -# Internal Modules End------------------------------------------------------------------------------ class Optimizer(AdaptiveSampler): """ @@ -293,7 +289,10 @@ def initialize(self, externalSeeding=None, solutionExport=None): for entry in self.assemblerDict.get('ImplicitConstraint', []): self._impConstraintFunctions.append(entry[3]) + + # FIXME can this superclass call be at the top of this method? AdaptiveSampler.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) + # sampler self._initializeInitSampler(externalSeeding) # seed diff --git a/ravenframework/Optimizers/RavenSampled.py b/ravenframework/Optimizers/RavenSampled.py index 9c5d19ea5b..e737597f90 100644 --- a/ravenframework/Optimizers/RavenSampled.py +++ b/ravenframework/Optimizers/RavenSampled.py @@ -17,21 +17,14 @@ Created 2020-01 @author: talbpaul """ -# for future compatibility with Python 3------------------------------------------------------------ -from __future__ import division, print_function, unicode_literals, absolute_import -# End compatibility block for Python 3-------------------------------------------------------------- - -# External Modules---------------------------------------------------------------------------------- import abc from collections import deque import copy + import numpy as np -# External Modules End------------------------------------------------------------------------------ -# Internal Modules---------------------------------------------------------------------------------- -from ..utils import InputData, InputTypes +from ..utils import InputData, InputTypes, utils from .Optimizer import Optimizer -# Internal Modules End------------------------------------------------------------------------------ class RavenSampled(Optimizer): @@ -112,10 +105,9 @@ def __init__(self): Optimizer.__init__(self) # Instance Variable Initialization # public - self.limit = None # max samples self.type = 'Sampled Optimizer' # type - self.batch = 1 # batch size: 1 means no batching (default) - self.batchId = 0 # Id of each batch of evaluations + self.batch = 0 # batch size: 0 means no batching (default) + self.limits['samples'] = None # requires a user input, so override base class definition # _protected self._writeSteps = 'final' # when steps should be written self._submissionQueue = deque() # TODO change to Queue.Queue if multithreading samples @@ -142,13 +134,13 @@ def handleInput(self, paramInput): # limit limit = init.findFirst('limit') if limit is not None: - self.limit = limit.value + self.limits['samples'] = limit.value # writeSteps writeSteps = init.findFirst('writeSteps') if writeSteps is not None: self._writeSteps = writeSteps.value # additional checks - if self.limit is None: + if self.limits['samples'] is None: self.raiseAnError(IOError, 'A is required for any RavenSampled Optimizer!') def _checkNDVariables(self): @@ -168,8 +160,9 @@ def initialize(self, externalSeeding=None, solutionExport=None): @ Out, None """ Optimizer.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) - self.batch = 1 - self.batchId = 0 + # TODO remove + # self.batch = 1 + # self.batchId = 0 ############### # Run Methods # @@ -221,41 +214,39 @@ def amIreadyToProvideAnInput(self): ready = Optimizer.amIreadyToProvideAnInput(self) # we're not ready yet if we don't have anything in queue ready = ready and len(self._submissionQueue) != 0 - + ready = ready and self.counters['samples'] + self.batch < self.limits['samples'] return ready - def localGenerateInput(self, model, inp): + def localGenerateInput(self, batch, model, modelInput): """ Provides the next sample to take. After this method is called, the self.inputInfo should be ready to be sent to the model + @ In, batch, RealizationBatch, mapping to populate with sample values @ In, model, model instance, an instance of a model - @ In, inp, list, a list of the original needed inputs for the model (e.g. list of files, etc.) + @ In, modelInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ - if self.batch > 1: - self.inputInfo['batchMode'] = True - batchData = [] - self.batchId += 1 - else: - self.inputInfo['batchMode'] = False - for _ in range(self.batch): - inputInfo = {'SampledVarsPb':{}, 'batchMode':self.inputInfo['batchMode']} # ,'prefix': str(self.batchId)+'_'+str(i) - if self.counter == self.limit + 1: - break + # would a new batch exceed the limit of samples? + # TODO shouldn't this check be in the Ready check? + if self.counters['samples'] + self.batch > self.limits['samples'] + 1: + raise utils.NoMoreSamplesNeeded + return None # TODO raise an error? something? + for rlz in batch: + inputInfo = rlz.inputInfo # get point from stack + # FIXME for now uses the popping queue, but this should be shifted to letting the + # Sampler fill the batch instead. The queue should store batches instead, maybe? point, info = self._submissionQueue.popleft() point = self.denormalizeData(point) # assign a tracking prefix - # prefix = inputInfo['prefix'] - prefix = self.inputInfo['prefix'] - inputInfo['prefix'] = prefix - # register the point tracking information - self._registerSample(prefix, info) - # build the point in the way the Sampler expects + # TODO update with batching? + prefix = inputInfo['prefix'] + self._registerSample(prefix, info) # TODO still needed after batching rework? + # build the point for var in self.toBeSampled: # , val in point.items(): val = point[var] if isinstance(point[var], float) else np.atleast_1d(point[var].data)[0] - self.values[var] = val # TODO should be np.atleast_1d? + rlz[var] = val ptProb = self.distDict[var].pdf(val) # sampler-required meta information # TODO should we not require this? inputInfo[f'ProbabilityWeight-{var}'] = ptProb @@ -263,16 +254,7 @@ def localGenerateInput(self, model, inp): inputInfo['ProbabilityWeight'] = 1 # TODO assume all weight 1? Not well-distributed samples inputInfo['PointProbability'] = np.prod([x for x in inputInfo['SampledVarsPb'].values()]) inputInfo['SamplerType'] = self.type - if self.inputInfo['batchMode']: - inputInfo['SampledVars'] = self.values - inputInfo['batchId'] = self.batchId - batchData.append(copy.deepcopy(inputInfo)) - else: - inputInfo['SampledVars'] = self.values - inputInfo['batchId'] = self.batchId - self.inputInfo.update(inputInfo) - if self.batch > 1: - self.inputInfo['batchInfo'] = {'nRuns': self.batch, 'batchRealizations': batchData, 'batchId': str('gen_' + str(self.batchId))} + return batch # @profile def localFinalizeActualSampling(self, jobObject, model, myInput): @@ -286,35 +268,52 @@ def localFinalizeActualSampling(self, jobObject, model, myInput): Optimizer.localFinalizeActualSampling(self, jobObject, model, myInput) # TODO should this be an Optimizer class action instead of Sampled? # collect finished job - prefix = jobObject.getMetadata()['prefix'] + meta = jobObject.getMetadata() + # OLD prefix = meta['prefix'] + batchID = meta['batchID'] # If we're not looking for the prefix, don't bother with using it # this usually happens if we've cancelled the run but it's already done - if not self.stillLookingForPrefix(prefix): - return # FIXME implicit constraints probable should be handled here too + if self._targetEvaluation.isEmpty: + self.raiseAnError(RuntimeError, f'Expected to find batch "{batchID}" in TargetEvaluation "{self._targetEvaluation.name}", but it is empty!') # get information and realization, and update trajectories - info = self.getIdentifierFromPrefix(prefix, pop=True) - if self.batch == 1: - _, rlz = self._targetEvaluation.realization(matchDict={'prefix': prefix}, asDataSet=False) + # OLD info = self.getIdentifierFromPrefix(prefix, pop=True) + if self.batch == 0: # FIXME should never be true + DEPRECATE + # _, rlz = self._targetEvaluation.realization(matchDict={'prefix': prefix}, asDataSet=False) + # if rlz is None: + # self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("prefix")}') else: # NOTE if here, then rlz is actually a xr.Dataset, NOT a dictionary!! - _, rlz = self._targetEvaluation.realization(matchDict={'batchId': self.batchId}, asDataSet=True, first=False) + _, data = self._targetEvaluation.realization(matchDict={'batchID': batchID}, asDataSet=False, first=False) + if data is None: + self.raiseAnError(RuntimeError, f'Expected to find batch with ID "{batchID}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("batchID")}') + # NOTE in this case "rlz" is ACTUALLY a xr.Dataset with multiple realizations in it! # _, full = self._targetEvaluation.realization(matchDict={'prefix': prefix}, asDataSet=False) - if self._targetEvaluation.isEmpty: - self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation "{self._targetEvaluation.name}", but it is empty!') - _, full = self._targetEvaluation.realization(matchDict={'prefix': prefix}) - if full is None: - self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("prefix")}') + # _, full = self._targetEvaluation.realization(matchDict={'prefix': prefix}) + # if full is None: + # self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("prefix")}') # trim down opt point to the useful parts # TODO making a new dict might be costly, maybe worth just passing whole point? # # testing suggests no big deal on smaller problem - # the sign of the objective function is flipped in case we do maximization - # so get the correct-signed value into the realization - if self._minMax == 'max': - rlz[self._objectiveVar] *= -1 - # TODO FIXME let normalizeData work on an xr.DataSet (batch) not just a dictionary! - rlz = self.normalizeData(rlz) - self._useRealization(info, rlz) + # TODO FIXME receive as a dataset instead of dicts? Might be faster. Might be a lot faster. + self.raiseADebug('Processing new batch results ...') + for r, rlz in enumerate(data): + prefix = rlz['prefix'] + if not self.stillLookingForPrefix(prefix): + # should we be skipping all of them if any aren't being looked for? + # This usually (only?) happens if an opt is rejected and associated runs are cancelled + # TODO turn into stillLookingForBatch? + continue + info = self.getIdentifierFromPrefix(prefix) + self.raiseADebug(f'... processing results of batch "{rlz["batchID"]}" prefix "{prefix}" ...') + self.raiseADebug('... -> prefix tags:', info) + # the sign of the objective function is flipped in case we do maximization + # so get the correct-signed value into the realization + if self._minMax == 'max': + rlz[self._objectiveVar] *= -1 + rlz = self.normalizeData(rlz) + self._useRealization(info, rlz) def finalizeSampler(self, failedRuns): """ @@ -643,7 +642,7 @@ def _updateSolutionExport(self, traj, rlz, acceptable, rejectReason): 'trajID': traj, 'accepted': acceptable, 'rejectReason': rejectReason, - 'modelRuns': self.counter + 'modelRuns': self.counters['samples'] }) # optimal point input and output spaces objValue = rlz[self._objectiveVar] diff --git a/ravenframework/Samplers/AdaptiveSampler.py b/ravenframework/Samplers/AdaptiveSampler.py index 4555dcaa11..dce9dcd3d4 100644 --- a/ravenframework/Samplers/AdaptiveSampler.py +++ b/ravenframework/Samplers/AdaptiveSampler.py @@ -18,13 +18,7 @@ @author: alfoa supercedes Samplers.py from alfoa (2/16/2013) """ -# for future compatibility with Python 3------------------------------------------------------------ -from __future__ import division, print_function, unicode_literals, absolute_import -# End compatibility block for Python 3-------------------------------------------------------------- - -# Internal Modules from ..utils import utils, mathUtils, InputData, InputTypes - from .Sampler import Sampler diff --git a/ravenframework/Samplers/Grid.py b/ravenframework/Samplers/Grid.py index 29a00c5f7c..aa3d90caf2 100644 --- a/ravenframework/Samplers/Grid.py +++ b/ravenframework/Samplers/Grid.py @@ -18,24 +18,17 @@ @author: alfoa supercedes Samplers.py from alfoa """ -# for future compatibility with Python 3------------------------------------------------------------ -from __future__ import division, print_function, unicode_literals, absolute_import -# End compatibility block for Python 3-------------------------------------------------------------- - -# External Modules---------------------------------------------------------------------------------- import sys import copy from operator import mul from functools import reduce + import numpy as np -# External Modules End------------------------------------------------------------------------------ -# Internal Modules---------------------------------------------------------------------------------- from .Sampler import Sampler from ..utils import utils from ..utils import InputData, InputTypes from .. import GridEntities -# Internal Modules End------------------------------------------------------------------------------ class Grid(Sampler): """ diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index 1253a2895c..b26f10b3cc 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -18,12 +18,13 @@ """ import sys -import copy import abc import json import itertools -import numpy as np from collections import namedtuple + +import numpy as np + from ..BaseClasses.InputDataUser import InputDataUser from ..utils import utils,randomUtils,InputData, InputTypes @@ -196,6 +197,21 @@ class cls. return inputSpecification + @classmethod + def getSolutionExportVariableNames(cls): + """ + Compiles a list of acceptable SolutionExport variable options. + @ In, None + @ Out, vars, dict, {varName: manual description} for each solution export option + """ + ok = super(Sampler, cls).getSolutionExportVariableNames() + new = { + 'batchID': 'identifier for the sampling batch. If not batching, same as sample identifier.' + } + ok.update(new) + return ok + + def __init__(self): """ Default Constructor that will initialize member variables with reasonable @@ -205,10 +221,15 @@ def __init__(self): """ super().__init__() ### COUNTERS AND FLAGS ### - self.batch = 0 # determines the size of each sampling batch to run - self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize - self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) - self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) + self.batch = 0 # determines the size of each sampling batch to run, 0 means none + self.counters = { + 'batches': 0, # Counter of number of batches submitted. Same as "samples" if not batching. + 'samples': 0, # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize + 'seeding': 0, # Used to control consecutive seeding, was "auxcnt" + } + self.limits = { + 'samples': sys.maxsize # limits the number of samples that can be taken. Other samples can add additional keywords. + } self.initSeed = None # if not provided the seed is randomly generated at the initialization of the sampler, the step can override the seed by sending in another one self.printTag = self.type # prefix for all prints (sampler type) self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding @@ -603,7 +624,7 @@ def getInitParams(self): paramDict = {} for variable in self.toBeSampled.items(): paramDict["sampled variable: "+variable[0]] = 'is sampled using the distribution ' +variable[1] - paramDict['limit' ] = self.limit + paramDict['limit' ] = self.limits['samples'] paramDict['initial seed' ] = self.initSeed paramDict.update(self.localGetInitParams()) @@ -618,15 +639,16 @@ def initialize(self, externalSeeding=None, solutionExport=None): """ if self.initSeed is None: self.initSeed = randomUtils.randomIntegers(0,2**31,self) - self.counter = 0 + self.counters['samples'] = 0 + self.counters['batches'] = 0 if not externalSeeding: randomUtils.randomSeed(self.initSeed) # use the sampler initialization seed - self.auxcnt = self.initSeed + self.counters['seeding'] = self.initSeed elif externalSeeding=='continue': pass # in this case the random sequence needs to be preserved else: randomUtils.randomSeed(externalSeeding) # the external seeding is used - self.auxcnt = externalSeeding + self.counters['seeding'] = externalSeeding # grab restart dataobject if it's available, then in localInitialize the sampler can deal with it. if 'Restart' in self.assemblerDict: self.raiseADebug('Restart object: '+str(self.assemblerDict['Restart'])) @@ -693,6 +715,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): meta = ['ProbabilityWeight','prefix','PointProbability'] for var in self.toBeSampled: meta += ['ProbabilityWeight-'+ key for key in var.split(",")] + if self.batch > 0: + meta.append('batchID') self.addMetaKeys(meta) def getBatchSize(self): @@ -745,7 +769,7 @@ def readSamplerInit(self,xmlNode): for childChild in child.subparts: if childChild.getName() == "limit": try: - self.limit = int(childChild.value) + self.limits['samples'] = int(childChild.value) except ValueError: self.raiseAnError(IOError, f'reading the attribute for the sampler {self.name} it was not possible to perform the conversion to integer for the attribute limit with value {childChild.value}') if childChild.getName() == "initialSeed": @@ -801,7 +825,7 @@ def getCurrentSetting(self): and each parameter's initial value as the dictionary values """ paramDict = {} - paramDict['counter'] = self.counter + paramDict['counter'] = self.counters['samples'] paramDict['initial seed'] = self.initSeed for key in self.samplerInfo: paramDict[key] = self.samplerInfo[key] @@ -839,7 +863,7 @@ def amIreadyToProvideAnInput(self): @ In, None @ Out, ready, bool, is this sampler ready to generate another sample? """ - if self.counter < self.limit: # can use < since counter is 0-based + if self.counters['samples'] < self.limits['samples']: # can use < since counter is 0-based ready = True else: ready = False @@ -855,8 +879,7 @@ def localStillReady(self, ready): @ In, ready, bool, a boolean representing whether the caller is prepared for another input. @ Out, ready, bool, a boolean representing whether the caller is prepared for another input. """ - # TODO is this an okay check for ALL samplers? - if self.counter > self.limit: + if self.counters['samples'] > self.limits['samples']: ready = False return ready @@ -993,28 +1016,31 @@ def _functionalVariables(self, rlzBatch): for corrVar in var.split(","): rlz[corrVar.strip()] = funcEval - def _incrementCounter(self): + def _incrementCounter(self, numAdded=1): """ Increments counter and sets up prefix. @ In, None @ Out, None """ #since we are creating the input for the next run we increase the counter and global counter - self.counter += 1 - self.auxcnt += 1 + self.counters['samples'] += numAdded + self.counters['batches'] += 1 + self.counters['seeding'] += numAdded # TODO could be 1, but kept for consistency # prep to exit if over the limit - if self.counter >= self.limit: - self.raiseADebug('Sampling limit reached!') + if self.counters['samples'] >= self.limits['samples']: + self.raiseADebug(f'Sampling limit reached! ({self.counters["samples"]} samples > {self.limits["sampling"]} limit)') # TODO this is disjointed from readiness check! # FIXME, the following condition check is make sure that the require info is only printed once # when dump metadata to xml, this should be removed in the future when we have a better way to # dump the metadata - if self.counter > 1: + if self.counters['samples'] > 1: for key in self.entitiesToRemove: self.samplerInfo.pop(key,None) if self.reseedAtEachIteration: - randomUtils.randomSeed(self.auxcnt - 1) - self.samplerInfo['prefix'] = str(self.counter) + randomUtils.randomSeed(self.counters['seeding'] - 1) + # FIXME this may be setting the BATCH prefix, not the SAMPLE prefix + # -> so let's move it out of this method + # self.samplerInfo['prefix'] = str(self.counters['batches']) #FIXME is this useful, or should we be using the counters? def _performVariableTransform(self, rlzBatch): """ @@ -1083,13 +1109,20 @@ def generateInput(self, model, modelInput): # instantiate a batch of data carrier realizations batchSize = self.getBatchSize() rlzBatch = RealizationBatch(batchSize) + rlzBatch.ID = self.counters['batches'] if batchSize == 0: - # this means the current sampler does not know how to handle batching, so do it one at a time + # this means the current sampler does not know how to handle batching, so fill the batch one-at-a-time for rlz in rlzBatch: self._incrementCounter() + rlz.inputInfo['prefix'] = str(self.counters['samples']) self.localGenerateInput(rlz, model, modelInput) else: - self._incrementCounter() # TODO FIXME for GA, need a batch counter + # since the counter incrementer adds the whole batch at once, grab the initial counter value + # so we can use it to number the samples correctly + startPrefix = self.counters['samples'] + self._incrementCounter(numAdded=batchSize) + for r, rlz in enumerate(rlzBatch): + rlz.inputInfo['prefix'] = str(startPrefix + r + 1) self.localGenerateInput(rlzBatch, model, modelInput) # this sampler knows how to handle batching, so we do it all at once # correlated variables @@ -1138,9 +1171,9 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ - FIXME # used? + FIXME # used? -> should be moved to using batch system! newInputs = [] - while self.amIreadyToProvideAnInput() and (self.counter < batchSize): + while self.amIreadyToProvideAnInput() and (self.counters['samples'] < batchSize): if projector is None: newInputs.append(self.generateInput(model, myInput)) else: @@ -1149,11 +1182,11 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): return newInputs @abc.abstractmethod - def localGenerateInput(self, rlz, model, modelInput): + def localGenerateInput(self, batch, model, modelInput): """ This class need to be overwritten since it is here that the magic of the sampler happens. After this method call the self.inputInfo should be ready to be sent to the model - @ In, rlz, Realization, mapping of variables to values + @ In, batch, RealizationBatch (or Realization if not compatible), mapping of variables to values @ In, model, model instance, Model instance @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, None @@ -1277,8 +1310,8 @@ def flush(self): """ self.metadataKeys = set() self.assemblerDict = {} - self.counter = 0 - self.auxcnt = 0 + for key in self.counters: + self.counters[key] = 0 self.distDict = {} self.funcDict = {} self.variableFunctionExecutionList = [] diff --git a/ravenframework/Steps/MultiRun.py b/ravenframework/Steps/MultiRun.py index 33c533a97b..96c286e17f 100644 --- a/ravenframework/Steps/MultiRun.py +++ b/ravenframework/Steps/MultiRun.py @@ -173,7 +173,6 @@ def _localTakeAstepRun(self, inDictionary): finishedJobList = finishedJobObjs self.raiseADebug(f'BATCHING: Collecting JOB batch named "{finishedJobList[0].groupId}".') else: - CANTGETHERE finishedJobList = [finishedJobObjs] currentFailures = [] for finishedJob in finishedJobList: From af84d716d582496de1b2da424fed6c81055a3ea7 Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 18 Dec 2024 09:39:52 -0700 Subject: [PATCH 13/18] regolding basic graddesc test due to using batching --- .../GradientDescent/gold/Basic/optOut.csv | 198 +++++++++--------- .../gold/Basic/opt_export_0.csv | 12 +- 2 files changed, 98 insertions(+), 112 deletions(-) diff --git a/tests/framework/Optimizers/GradientDescent/gold/Basic/optOut.csv b/tests/framework/Optimizers/GradientDescent/gold/Basic/optOut.csv index 27707761f6..deb089a613 100644 --- a/tests/framework/Optimizers/GradientDescent/gold/Basic/optOut.csv +++ b/tests/framework/Optimizers/GradientDescent/gold/Basic/optOut.csv @@ -1,101 +1,97 @@ -x,y,func,const,ans,aux_ans,PointProbability,prefix,ProbabilityWeight-y,ProbabilityWeight,ProbabilityWeight-x --2.0,-2.0,-0.86,3.14,495.703125,-4.0,0.0123456790123,1,0.111111111111,1,0.111111111111 --1.99363603897,-2.0,-0.853636038969,3.14,492.914946598,-3.99363603897,0.0123456790123,2,0.111111111111,1,0.111111111111 --2.0,-2.00636396103,-0.866363961031,3.14,502.623118284,-4.00636396103,0.0123456790123,3,0.111111111111,1,0.111111111111 --1.76216514014,-1.4097165262,-0.0318816663469,3.14,120.216065726,-3.17188166635,0.0123456790123,4,0.111111111111,1,0.111111111111 --1.76852910117,-1.4097165262,-0.0382456273776,3.14,120.837901258,-3.17824562738,0.0123456790123,5,0.111111111111,1,0.111111111111 --1.76216514014,-1.40335256517,-0.0255177053162,3.14,118.883771245,-3.16551770532,0.0123456790123,6,0.111111111111,1,0.111111111111 --1.49300760726,-0.833041381524,0.813951011217,3.14,50.0888841374,-2.32604898878,0.0123456790123,7,0.111111111111,1,0.111111111111 --1.49937156829,-0.833041381524,0.807587050187,3.14,50.2985657213,-2.33241294981,0.0123456790123,8,0.111111111111,1,0.111111111111 --1.49300760726,-0.839405342555,0.807587050187,3.14,50.2826749673,-2.33241294981,0.0123456790123,9,0.111111111111,1,0.111111111111 --0.908994151272,-0.293287483134,1.93771836559,3.14,29.3018670741,-1.20228163441,0.0123456790123,10,0.111111111111,1,0.111111111111 --0.916946547059,-0.293287483134,1.92976596981,3.14,29.4599117277,-1.21023403019,0.0123456790123,11,0.111111111111,1,0.111111111111 --0.908994151272,-0.301239878921,1.92976596981,3.14,29.3277761275,-1.21023403019,0.0123456790123,12,0.111111111111,1,0.111111111111 -0.0558184408956,-0.135120911841,3.06069752905,3.14,13.4828248644,-0.0793024709456,0.0123456790123,13,0.111111111111,1,0.111111111111 -0.065595352485,-0.135120911841,3.07047444064,3.14,13.3587492181,-0.0695255593562,0.0123456790123,14,0.111111111111,1,0.111111111111 -0.0558184408956,-0.125344000252,3.07047444064,3.14,13.4839118934,-0.0695255593562,0.0123456790123,15,0.111111111111,1,0.111111111111 -1.23358144803,-0.145439315804,4.22814213223,3.14,3.01990062781,1.08814213223,0.0123456790123,16,0.111111111111,1,0.111111111111 -1.24535953009,-0.145439315804,4.23992021429,3.14,2.96118171263,1.09992021429,0.0123456790123,17,0.111111111111,1,0.111111111111 -1.23358144803,-0.133661233743,4.23992021429,3.14,3.0165593131,1.09992021429,0.0123456790123,18,0.111111111111,1,0.111111111111 -2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,19,0.111111111111,1,0.111111111111 -2.71334856186,-0.0620703524431,5.79127820942,3.14,2.12231442969,2.65127820942,0.0123456790123,20,0.111111111111,1,0.111111111111 -2.6986739362,-0.0473957267819,5.79127820942,3.14,1.96118873401,2.65127820942,0.0123456790123,21,0.111111111111,1,0.111111111111 -1.7952574268,1.5333511983,6.4686086251,3.14,81.2197052519,3.3286086251,0.0123456790123,22,0.111111111111,1,0.111111111111 -2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,23,0.111111111111,1,0.111111111111 -2.6803394569,-0.0620703524431,5.75826910445,3.14,1.99316991529,2.61826910445,0.0123456790123,24,0.111111111111,1,0.111111111111 -2.6986739362,-0.0437358731375,5.79493806307,3.14,1.93587017536,2.65493806307,0.0123456790123,25,0.111111111111,1,0.111111111111 -2.1071506516,1.0075623514,6.25471300301,3.14,14.6513640883,3.11471300301,0.0123456790123,26,0.111111111111,1,0.111111111111 -2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,27,0.111111111111,1,0.111111111111 -2.71089692241,-0.0620703524431,5.78882656996,3.14,2.11248886225,2.64882656996,0.0123456790123,28,0.111111111111,1,0.111111111111 -2.6986739362,-0.0498473662394,5.78882656996,3.14,1.97822409982,2.64882656996,0.0123456790123,29,0.111111111111,1,0.111111111111 -2.2982778586,0.647640269243,6.08591812785,3.14,2.2193394575,2.94591812785,0.0123456790123,30,0.111111111111,1,0.111111111111 -2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,31,0.111111111111,1,0.111111111111 -2.69052527873,-0.0620703524431,5.76845492629,3.14,2.03229532949,2.62845492629,0.0123456790123,32,0.111111111111,1,0.111111111111 -2.6986739362,-0.0702190099122,5.76845492629,3.14,2.12213322242,2.62845492629,0.0123456790123,33,0.111111111111,1,0.111111111111 -2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,34,0.111111111111,1,0.111111111111 -2.4433954879,0.414525591535,5.99792107943,3.14,0.182589237911,2.85792107943,0.0123456790123,35,0.111111111111,1,0.111111111111 -2.43796304958,0.419958029847,5.99792107943,3.14,0.20107756413,2.85792107943,0.0123456790123,36,0.111111111111,1,0.111111111111 -2.73935159855,-0.193966559427,5.68538503912,3.14,3.3026249344,2.54538503912,0.0123456790123,37,0.111111111111,1,0.111111111111 -2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,38,0.111111111111,1,0.111111111111 -2.4447534675,0.414525591535,5.99927905904,3.14,0.181075807611,2.85927905904,0.0123456790123,39,0.111111111111,1,0.111111111111 -2.43796304958,0.407735173615,5.9856982232,3.14,0.174494637289,2.8456982232,0.0123456790123,40,0.111111111111,1,0.111111111111 -2.65217854187,0.0157223085471,5.80790085042,3.14,1.39512500367,2.66790085042,0.0123456790123,41,0.111111111111,1,0.111111111111 -2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,42,0.111111111111,1,0.111111111111 -2.4334361043,0.414525591535,5.98796169584,3.14,0.193901863832,2.84796169584,0.0123456790123,43,0.111111111111,1,0.111111111111 -2.43796304958,0.419052536814,5.9970155864,3.14,0.198954700077,2.8570155864,0.0123456790123,44,0.111111111111,1,0.111111111111 -2.57435570864,0.145308057513,5.85966376616,3.14,0.566720647988,2.71966376616,0.0123456790123,45,0.111111111111,1,0.111111111111 -2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,46,0.111111111111,1,0.111111111111 -2.4409810131,0.414525591535,5.99550660464,3.14,0.185297329088,2.85550660464,0.0123456790123,47,0.111111111111,1,0.111111111111 -2.43796304958,0.411507628015,5.9894706776,3.14,0.182226142843,2.8494706776,0.0123456790123,48,0.111111111111,1,0.111111111111 -2.53170619372,0.23650119913,5.90820739285,3.14,0.223152036915,2.76820739285,0.0123456790123,49,0.111111111111,1,0.111111111111 -2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,50,0.111111111111,1,0.111111111111 -2.43997502526,0.414525591535,5.9945006168,3.14,0.186432159538,2.8545006168,0.0123456790123,51,0.111111111111,1,0.111111111111 -2.43796304958,0.416537567215,5.9945006168,3.14,0.193188891909,2.8545006168,0.0123456790123,52,0.111111111111,1,0.111111111111 -2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,53,0.111111111111,1,0.111111111111 -2.50021412998,0.295021127603,5.93523525758,3.14,0.105736406068,2.79523525758,0.0123456790123,54,0.111111111111,1,0.111111111111 -2.49887281286,0.293679810483,5.93255262334,3.14,0.10722014218,2.79255262334,0.0123456790123,55,0.111111111111,1,0.111111111111 -2.4904680161,0.462471654459,6.09293967056,3.14,0.256459637347,2.95293967056,0.0123456790123,56,0.111111111111,1,0.111111111111 -2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,57,0.111111111111,1,0.111111111111 -2.5005494261,0.295021127603,5.9355705537,3.14,0.105757287813,2.7955705537,0.0123456790123,58,0.111111111111,1,0.111111111111 -2.49887281286,0.296697740842,5.9355705537,3.14,0.103753590883,2.7955705537,0.0123456790123,59,0.111111111111,1,0.111111111111 -2.49305314774,0.406643736668,6.0396968844,3.14,0.118808446685,2.8996968844,0.0123456790123,60,0.111111111111,1,0.111111111111 -2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,61,0.111111111111,1,0.111111111111 -2.49999055502,0.295021127603,5.93501168262,3.14,0.105722767011,2.79501168262,0.0123456790123,62,0.111111111111,1,0.111111111111 -2.49887281286,0.296138869762,5.93501168262,3.14,0.104382421063,2.79501168262,0.0123456790123,63,0.111111111111,1,0.111111111111 -2.49509401848,0.36944139653,6.00453541501,3.14,0.079876658151,2.86453541501,0.0123456790123,64,0.111111111111,1,0.111111111111 -2.49434885704,0.36944139653,6.00379025357,3.14,0.0802936351736,2.86379025357,0.0123456790123,65,0.111111111111,1,0.111111111111 -2.49509401848,0.36869623509,6.00379025357,3.14,0.0795045881868,2.86379025357,0.0123456790123,66,0.111111111111,1,0.111111111111 -2.56459362239,0.307426672113,6.0120202945,3.14,0.0996875631353,2.8720202945,0.0123456790123,67,0.111111111111,1,0.111111111111 -2.49509401848,0.36944139653,6.00453541501,3.14,0.079876658151,2.86453541501,0.0123456790123,68,0.111111111111,1,0.111111111111 -2.49602547009,0.36944139653,6.00546686662,3.14,0.0793586305512,2.86546686662,0.0123456790123,69,0.111111111111,1,0.111111111111 -2.49509401848,0.368509944921,6.0036039634,3.14,0.079413745294,2.8636039634,0.0123456790123,70,0.111111111111,1,0.111111111111 -2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.0123456790123,71,0.111111111111,1,0.111111111111 -2.54077612989,0.32806466165,6.00884079154,3.14,0.0735005906712,2.86884079154,0.0123456790123,72,0.111111111111,1,0.111111111111 -2.54139709763,0.327443693911,6.00884079154,3.14,0.0740050813894,2.86884079154,0.0123456790123,73,0.111111111111,1,0.111111111111 -2.54369708324,0.405651541563,6.08934862481,3.14,0.0787844861168,2.94934862481,0.0123456790123,74,0.111111111111,1,0.111111111111 -2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.0123456790123,75,0.111111111111,1,0.111111111111 -2.54217330726,0.32806466165,6.01023796891,3.14,0.0734682762216,2.87023796891,0.0123456790123,76,0.111111111111,1,0.111111111111 -2.54139709763,0.328840871278,6.01023796891,3.14,0.0728468946002,2.87023796891,0.0123456790123,77,0.111111111111,1,0.111111111111 -2.5427669421,0.379793835891,6.06256077799,3.14,0.0604894232541,2.92256077799,0.0123456790123,78,0.111111111111,1,0.111111111111 -2.54224946902,0.379793835891,6.06204330491,3.14,0.0607227736813,2.92204330491,0.0123456790123,79,0.111111111111,1,0.111111111111 -2.5427669421,0.379276362805,6.06204330491,3.14,0.0602954387144,2.92204330491,0.0123456790123,80,0.111111111111,1,0.111111111111 -2.5925082872,0.338443785458,6.07095207266,3.14,0.0660262750688,2.93095207266,0.0123456790123,81,0.111111111111,1,0.111111111111 -2.5427669421,0.379793835891,6.06256077799,3.14,0.0604894232541,2.92256077799,0.0123456790123,82,0.111111111111,1,0.111111111111 -2.54212010147,0.379793835891,6.06191393736,3.14,0.0607812791946,2.92191393736,0.0123456790123,83,0.111111111111,1,0.111111111111 -2.5427669421,0.380440676527,6.06320761863,3.14,0.0607417463916,2.92320761863,0.0123456790123,84,0.111111111111,1,0.111111111111 -2.57538850329,0.351590967546,6.06697947083,3.14,0.0550136051699,2.92697947083,0.0123456790123,85,0.111111111111,1,0.111111111111 -2.57581973038,0.351590967546,6.06741069792,3.14,0.0549810549548,2.92741069792,0.0123456790123,86,0.111111111111,1,0.111111111111 -2.57538850329,0.352022194637,6.06741069792,3.14,0.0547604777635,2.92741069792,0.0123456790123,87,0.111111111111,1,0.111111111111 -2.58226316361,0.405051902693,6.1273150663,3.14,0.0556156283883,2.9873150663,0.0123456790123,88,0.111111111111,1,0.111111111111 -2.57538850329,0.351590967546,6.06697947083,3.14,0.0550136051699,2.92697947083,0.0123456790123,89,0.111111111111,1,0.111111111111 -2.57484949192,0.351590967546,6.06644045947,3.14,0.0550553912795,2.92644045947,0.0123456790123,90,0.111111111111,1,0.111111111111 -2.57538850329,0.352129978914,6.0675184822,3.14,0.0546978877332,2.9275184822,0.0123456790123,91,0.111111111111,1,0.111111111111 -2.58010336695,0.387214400395,6.10731776734,3.14,0.0476931261698,2.96731776734,0.0123456790123,92,0.111111111111,1,0.111111111111 -2.58046270786,0.387214400395,6.10767710825,3.14,0.0475626136197,2.96767710825,0.0123456790123,93,0.111111111111,1,0.111111111111 -2.58010336695,0.386855059483,6.10695842643,3.14,0.0476012837972,2.96695842643,0.0123456790123,94,0.111111111111,1,0.111111111111 -2.61683719642,0.361364610093,6.11820180652,3.14,0.047273164752,2.97820180652,0.0123456790123,95,0.111111111111,1,0.111111111111 -2.61728637188,0.361364610093,6.11865098197,3.14,0.0472787402168,2.97865098197,0.0123456790123,96,0.111111111111,1,0.111111111111 -2.61683719642,0.360915434639,6.11775263106,3.14,0.0475995148696,2.97775263106,0.0123456790123,97,0.111111111111,1,0.111111111111 -2.61620130686,0.398585297682,6.15478660454,3.14,0.0382727627184,3.01478660454,0.0123456790123,98,0.111111111111,1,0.111111111111 -2.61657356805,0.398585297682,6.15515886573,3.14,0.0381532820128,3.01515886573,0.0123456790123,99,0.111111111111,1,0.111111111111 -2.61620130686,0.398213036491,6.15441434335,3.14,0.0381790027404,3.01441434335,0.0123456790123,100,0.111111111111,1,0.111111111111 +x,y,func,const,ans,aux_ans,PointProbability,ProbabilityWeight,batchID,ProbabilityWeight-x,prefix,ProbabilityWeight-y +-2.0,-2.0,-0.86,3.14,495.703125,-4.0,0.0123456790123,1,0,0.111111111111,1,0.111111111111 +-1.99363603897,-2.0,-0.853636038969,3.14,492.914946598,-3.99363603897,0.0123456790123,1,0,0.111111111111,2,0.111111111111 +-2.0,-2.00636396103,-0.866363961031,3.14,502.623118284,-4.00636396103,0.0123456790123,1,0,0.111111111111,3,0.111111111111 +-1.76216514014,-1.4097165262,-0.0318816663469,3.14,120.216065726,-3.17188166635,0.0123456790123,1,1,0.111111111111,4,0.111111111111 +-1.76852910117,-1.4097165262,-0.0382456273776,3.14,120.837901258,-3.17824562738,0.0123456790123,1,1,0.111111111111,5,0.111111111111 +-1.76216514014,-1.40335256517,-0.0255177053162,3.14,118.883771245,-3.16551770532,0.0123456790123,1,1,0.111111111111,6,0.111111111111 +-1.49300760726,-0.833041381524,0.813951011217,3.14,50.0888841374,-2.32604898878,0.0123456790123,1,2,0.111111111111,7,0.111111111111 +-1.49937156829,-0.833041381524,0.807587050187,3.14,50.2985657213,-2.33241294981,0.0123456790123,1,2,0.111111111111,8,0.111111111111 +-1.49300760726,-0.839405342555,0.807587050187,3.14,50.2826749673,-2.33241294981,0.0123456790123,1,2,0.111111111111,9,0.111111111111 +-0.908994151272,-0.293287483134,1.93771836559,3.14,29.3018670741,-1.20228163441,0.0123456790123,1,3,0.111111111111,10,0.111111111111 +-0.916946547059,-0.293287483134,1.92976596981,3.14,29.4599117277,-1.21023403019,0.0123456790123,1,3,0.111111111111,11,0.111111111111 +-0.908994151272,-0.301239878921,1.92976596981,3.14,29.3277761275,-1.21023403019,0.0123456790123,1,3,0.111111111111,12,0.111111111111 +0.0558184408956,-0.135120911841,3.06069752905,3.14,13.4828248644,-0.0793024709456,0.0123456790123,1,4,0.111111111111,13,0.111111111111 +0.065595352485,-0.135120911841,3.07047444064,3.14,13.3587492181,-0.0695255593562,0.0123456790123,1,4,0.111111111111,14,0.111111111111 +0.0558184408956,-0.125344000252,3.07047444064,3.14,13.4839118934,-0.0695255593562,0.0123456790123,1,4,0.111111111111,15,0.111111111111 +1.23358144803,-0.145439315804,4.22814213223,3.14,3.01990062781,1.08814213223,0.0123456790123,1,5,0.111111111111,16,0.111111111111 +1.24535953009,-0.145439315804,4.23992021429,3.14,2.96118171263,1.09992021429,0.0123456790123,1,5,0.111111111111,17,0.111111111111 +1.23358144803,-0.133661233743,4.23992021429,3.14,3.0165593131,1.09992021429,0.0123456790123,1,5,0.111111111111,18,0.111111111111 +2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,1,6,0.111111111111,19,0.111111111111 +2.71334856186,-0.0620703524431,5.79127820942,3.14,2.12231442969,2.65127820942,0.0123456790123,1,6,0.111111111111,20,0.111111111111 +2.6986739362,-0.0473957267819,5.79127820942,3.14,1.96118873401,2.65127820942,0.0123456790123,1,6,0.111111111111,21,0.111111111111 +1.7952574268,1.5333511983,6.4686086251,3.14,81.2197052519,3.3286086251,0.0123456790123,1,7,0.111111111111,22,0.111111111111 +1.77692294749,1.5333511983,6.4502741458,3.14,80.2454284489,3.3102741458,0.0123456790123,1,7,0.111111111111,23,0.111111111111 +1.7952574268,1.55168567761,6.48694310441,3.14,85.82889664,3.34694310441,0.0123456790123,1,7,0.111111111111,24,0.111111111111 +2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,1,8,0.111111111111,25,0.111111111111 +2.6803394569,-0.0620703524431,5.75826910445,3.14,1.99316991529,2.61826910445,0.0123456790123,1,8,0.111111111111,26,0.111111111111 +2.6986739362,-0.0437358731375,5.79493806307,3.14,1.93587017536,2.65493806307,0.0123456790123,1,8,0.111111111111,27,0.111111111111 +2.1071506516,1.0075623514,6.25471300301,3.14,14.6513640883,3.11471300301,0.0123456790123,1,9,0.111111111111,28,0.111111111111 +2.0949276654,1.0075623514,6.2424900168,3.14,14.6487432332,3.1024900168,0.0123456790123,1,9,0.111111111111,29,0.111111111111 +2.1071506516,0.995339365198,6.2424900168,3.14,13.9328678068,3.1024900168,0.0123456790123,1,9,0.111111111111,30,0.111111111111 +2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,1,10,0.111111111111,31,0.111111111111 +2.71089692241,-0.0620703524431,5.78882656996,3.14,2.11248886225,2.64882656996,0.0123456790123,1,10,0.111111111111,32,0.111111111111 +2.6986739362,-0.0498473662394,5.78882656996,3.14,1.97822409982,2.64882656996,0.0123456790123,1,10,0.111111111111,33,0.111111111111 +2.2982778586,0.647640269243,6.08591812785,3.14,2.2193394575,2.94591812785,0.0123456790123,1,11,0.111111111111,34,0.111111111111 +2.29012920113,0.647640269243,6.07776947038,3.14,2.2433217688,2.93776947038,0.0123456790123,1,11,0.111111111111,35,0.111111111111 +2.2982778586,0.639491611774,6.07776947038,3.14,2.10655303672,2.93776947038,0.0123456790123,1,11,0.111111111111,36,0.111111111111 +2.6986739362,-0.0620703524431,5.77660358376,3.14,2.0640619099,2.63660358376,0.0123456790123,1,12,0.111111111111,37,0.111111111111 +2.69052527873,-0.0620703524431,5.76845492629,3.14,2.03229532949,2.62845492629,0.0123456790123,1,12,0.111111111111,38,0.111111111111 +2.6986739362,-0.0702190099122,5.76845492629,3.14,2.12213322242,2.62845492629,0.0123456790123,1,12,0.111111111111,39,0.111111111111 +2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,1,13,0.111111111111,40,0.111111111111 +2.4433954879,0.414525591535,5.99792107943,3.14,0.182589237911,2.85792107943,0.0123456790123,1,13,0.111111111111,41,0.111111111111 +2.43796304958,0.419958029847,5.99792107943,3.14,0.20107756413,2.85792107943,0.0123456790123,1,13,0.111111111111,42,0.111111111111 +2.73935159855,-0.193966559427,5.68538503912,3.14,3.3026249344,2.54538503912,0.0123456790123,1,14,0.111111111111,43,0.111111111111 +2.74614201647,-0.193966559427,5.69217545704,3.14,3.338378623,2.55217545704,0.0123456790123,1,14,0.111111111111,44,0.111111111111 +2.73935159855,-0.200756977347,5.6785946212,3.14,3.36381567922,2.5385946212,0.0123456790123,1,14,0.111111111111,45,0.111111111111 +2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,1,15,0.111111111111,46,0.111111111111 +2.4447534675,0.414525591535,5.99927905904,3.14,0.181075807611,2.85927905904,0.0123456790123,1,15,0.111111111111,47,0.111111111111 +2.43796304958,0.407735173615,5.9856982232,3.14,0.174494637289,2.8456982232,0.0123456790123,1,15,0.111111111111,48,0.111111111111 +2.65217854187,0.0157223085471,5.80790085042,3.14,1.39512500367,2.66790085042,0.0123456790123,1,16,0.111111111111,49,0.111111111111 +2.65670548715,0.0157223085471,5.8124277957,3.14,1.40896234846,2.6724277957,0.0123456790123,1,16,0.111111111111,50,0.111111111111 +2.65217854187,0.0111953632673,5.80337390514,3.14,1.42219458864,2.66337390514,0.0123456790123,1,16,0.111111111111,51,0.111111111111 +2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,1,17,0.111111111111,52,0.111111111111 +2.4334361043,0.414525591535,5.98796169584,3.14,0.193901863832,2.84796169584,0.0123456790123,1,17,0.111111111111,53,0.111111111111 +2.43796304958,0.419052536814,5.9970155864,3.14,0.198954700077,2.8570155864,0.0123456790123,1,17,0.111111111111,54,0.111111111111 +2.57435570864,0.145308057513,5.85966376616,3.14,0.566720647988,2.71966376616,0.0123456790123,1,18,0.111111111111,55,0.111111111111 +2.57737367216,0.145308057513,5.86268172968,3.14,0.571600745018,2.72268172968,0.0123456790123,1,18,0.111111111111,56,0.111111111111 +2.57435570864,0.148326021033,5.86268172968,3.14,0.55473197081,2.72268172968,0.0123456790123,1,18,0.111111111111,57,0.111111111111 +2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,1,19,0.111111111111,58,0.111111111111 +2.4409810131,0.414525591535,5.99550660464,3.14,0.185297329088,2.85550660464,0.0123456790123,1,19,0.111111111111,59,0.111111111111 +2.43796304958,0.411507628015,5.9894706776,3.14,0.182226142843,2.8494706776,0.0123456790123,1,19,0.111111111111,60,0.111111111111 +2.53170619372,0.23650119913,5.90820739285,3.14,0.223152036915,2.76820739285,0.0123456790123,1,20,0.111111111111,61,0.111111111111 +2.52969421804,0.23650119913,5.90619541717,3.14,0.22180298414,2.76619541717,0.0123456790123,1,20,0.111111111111,62,0.111111111111 +2.53170619372,0.23851317481,5.91021936853,3.14,0.21831447519,2.77021936853,0.0123456790123,1,20,0.111111111111,63,0.111111111111 +2.43796304958,0.414525591535,5.99248864112,3.14,0.1887133043,2.85248864112,0.0123456790123,1,21,0.111111111111,64,0.111111111111 +2.43997502526,0.414525591535,5.9945006168,3.14,0.186432159538,2.8545006168,0.0123456790123,1,21,0.111111111111,65,0.111111111111 +2.43796304958,0.416537567215,5.9945006168,3.14,0.193188891909,2.8545006168,0.0123456790123,1,21,0.111111111111,66,0.111111111111 +2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,1,22,0.111111111111,67,0.111111111111 +2.50021412998,0.295021127603,5.93523525758,3.14,0.105736406068,2.79523525758,0.0123456790123,1,22,0.111111111111,68,0.111111111111 +2.49887281286,0.293679810483,5.93255262334,3.14,0.10722014218,2.79255262334,0.0123456790123,1,22,0.111111111111,69,0.111111111111 +2.4904680161,0.462471654459,6.09293967056,3.14,0.256459637347,2.95293967056,0.0123456790123,1,23,0.111111111111,70,0.111111111111 +2.49214462934,0.462471654459,6.0946162838,3.14,0.254252670186,2.9546162838,0.0123456790123,1,23,0.111111111111,71,0.111111111111 +2.4904680161,0.464148267698,6.0946162838,3.14,0.262155686925,2.9546162838,0.0123456790123,1,23,0.111111111111,72,0.111111111111 +2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,1,24,0.111111111111,73,0.111111111111 +2.5005494261,0.295021127603,5.9355705537,3.14,0.105757287813,2.7955705537,0.0123456790123,1,24,0.111111111111,74,0.111111111111 +2.49887281286,0.296697740842,5.9355705537,3.14,0.103753590883,2.7955705537,0.0123456790123,1,24,0.111111111111,75,0.111111111111 +2.49305314774,0.406643736668,6.0396968844,3.14,0.118808446685,2.8996968844,0.0123456790123,1,25,0.111111111111,76,0.111111111111 +2.49193540558,0.406643736668,6.03857914224,3.14,0.119778717957,2.89857914224,0.0123456790123,1,25,0.111111111111,77,0.111111111111 +2.49305314774,0.405525994509,6.03857914224,3.14,0.117115853814,2.89857914224,0.0123456790123,1,25,0.111111111111,78,0.111111111111 +2.49887281286,0.295021127603,5.93389394046,3.14,0.105657997798,2.79389394046,0.0123456790123,1,26,0.111111111111,79,0.111111111111 +2.49999055502,0.295021127603,5.93501168262,3.14,0.105722767011,2.79501168262,0.0123456790123,1,26,0.111111111111,80,0.111111111111 +2.49887281286,0.296138869762,5.93501168262,3.14,0.104382421063,2.79501168262,0.0123456790123,1,26,0.111111111111,81,0.111111111111 +2.49509401848,0.36944139653,6.00453541501,3.14,0.079876658151,2.86453541501,0.0123456790123,1,27,0.111111111111,82,0.111111111111 +2.49434885704,0.36944139653,6.00379025357,3.14,0.0802936351736,2.86379025357,0.0123456790123,1,27,0.111111111111,83,0.111111111111 +2.49509401848,0.36869623509,6.00379025357,3.14,0.0795045881868,2.86379025357,0.0123456790123,1,27,0.111111111111,84,0.111111111111 +2.56459362239,0.307426672113,6.0120202945,3.14,0.0996875631353,2.8720202945,0.0123456790123,1,28,0.111111111111,85,0.111111111111 +2.56366217078,0.307426672113,6.01108884289,3.14,0.0994554302373,2.87108884289,0.0123456790123,1,28,0.111111111111,86,0.111111111111 +2.56459362239,0.308358123722,6.01295174611,3.14,0.0983473494197,2.87295174611,0.0123456790123,1,28,0.111111111111,87,0.111111111111 +2.49509401848,0.36944139653,6.00453541501,3.14,0.079876658151,2.86453541501,0.0123456790123,1,29,0.111111111111,88,0.111111111111 +2.49602547009,0.36944139653,6.00546686662,3.14,0.0793586305512,2.86546686662,0.0123456790123,1,29,0.111111111111,89,0.111111111111 +2.49509401848,0.368509944921,6.0036039634,3.14,0.079413745294,2.8636039634,0.0123456790123,1,29,0.111111111111,90,0.111111111111 +2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.0123456790123,1,30,0.111111111111,91,0.111111111111 +2.54077612989,0.32806466165,6.00884079154,3.14,0.0735005906712,2.86884079154,0.0123456790123,1,30,0.111111111111,92,0.111111111111 +2.54139709763,0.327443693911,6.00884079154,3.14,0.0740050813894,2.86884079154,0.0123456790123,1,30,0.111111111111,93,0.111111111111 +2.54369708324,0.405651541563,6.08934862481,3.14,0.0787844861168,2.94934862481,0.0123456790123,1,31,0.111111111111,94,0.111111111111 +2.54447329287,0.405651541563,6.09012483443,3.14,0.0782708219754,2.95012483443,0.0123456790123,1,31,0.111111111111,95,0.111111111111 +2.54369708324,0.406427751191,6.09012483443,3.14,0.0796360275747,2.95012483443,0.0123456790123,1,31,0.111111111111,96,0.111111111111 diff --git a/tests/framework/Optimizers/GradientDescent/gold/Basic/opt_export_0.csv b/tests/framework/Optimizers/GradientDescent/gold/Basic/opt_export_0.csv index a3c14cfdd9..5069071d32 100644 --- a/tests/framework/Optimizers/GradientDescent/gold/Basic/opt_export_0.csv +++ b/tests/framework/Optimizers/GradientDescent/gold/Basic/opt_export_0.csv @@ -31,14 +31,4 @@ x,y,func,const,ans,aux_ans,stepSize,iteration,accepted,conv_gradient 2.49509401848,0.36944139653,6.00453541501,3.14,0.079876658151,2.86453541501,0.0103494623159,29.0,rerun,0.0 2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.00689964154392,30.0,accepted,0.0 2.54369708324,0.405651541563,6.08934862481,3.14,0.0787844861168,2.94934862481,0.00862455142398,31.0,rejected,0.0 -2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.00862455142398,32.0,rerun,0.0 -2.5427669421,0.379793835891,6.06256077799,3.14,0.0604894232541,2.92256077799,0.00574970094932,33.0,accepted,0.0 -2.5925082872,0.338443785458,6.07095207266,3.14,0.0660262750688,2.93095207266,0.00718711817723,34.0,rejected,0.0 -2.5427669421,0.379793835891,6.06256077799,3.14,0.0604894232541,2.92256077799,0.00718711817723,35.0,rerun,0.0 -2.57538850329,0.351590967546,6.06697947083,3.14,0.0550136051699,2.92697947083,0.00479141211815,36.0,accepted,0.0 -2.58226316361,0.405051902693,6.1273150663,3.14,0.0556156283883,2.9873150663,0.00598901519275,37.0,rejected,0.0 -2.57538850329,0.351590967546,6.06697947083,3.14,0.0550136051699,2.92697947083,0.00598901519275,38.0,rerun,0.0 -2.58010336695,0.387214400395,6.10731776734,3.14,0.0476931261698,2.96731776734,0.00399267679517,39.0,accepted,0.0 -2.61683719642,0.361364610093,6.11820180652,3.14,0.047273164752,2.97820180652,0.00499083838033,40.0,accepted,0.0 -2.61620130686,0.398585297682,6.15478660454,3.14,0.0382727627184,3.01478660454,0.00413623545083,41.0,accepted,0.0 -2.61620130686,0.398585297682,6.15478660454,3.14,0.0382727627184,3.01478660454,0.00325702132016,42.0,final,0.0 +2.54139709763,0.32806466165,6.00946175928,3.14,0.0734851786742,2.86946175928,0.00862455142398,32.0,final,0.0 From 6ee36de41797aecaa14c5ac74f2409f1cc901b35 Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 18 Dec 2024 09:40:55 -0700 Subject: [PATCH 14/18] assuring batchID gets into the expected meta --- ravenframework/BaseClasses/BaseEntity.py | 4 +++- ravenframework/Optimizers/GradientDescent.py | 4 +++- ravenframework/Optimizers/RavenSampled.py | 4 ---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ravenframework/BaseClasses/BaseEntity.py b/ravenframework/BaseClasses/BaseEntity.py index 877d8ad820..b8abd800c2 100644 --- a/ravenframework/BaseClasses/BaseEntity.py +++ b/ravenframework/BaseClasses/BaseEntity.py @@ -209,7 +209,7 @@ def provideExpectedMetaKeys(self): """ return self.metadataKeys, self.metadataParams - def addMetaKeys(self,args, params={}): + def addMetaKeys(self,args, params=None): """ Adds keywords to a list of expected metadata keys. @ In, args, list(str), keywords to register @@ -217,6 +217,8 @@ def addMetaKeys(self,args, params={}): values of the dictionary are lists of the corresponding indexes/coordinates of given variable @ Out, None """ + if params is None: + params = {} if any(not mathUtils.isAString(a) for a in args): self.raiseAnError('Arguments to addMetaKeys were not all strings:',args) self.metadataKeys = self.metadataKeys.union(set(args)) diff --git a/ravenframework/Optimizers/GradientDescent.py b/ravenframework/Optimizers/GradientDescent.py index 1a3161d23b..d74b0d2cee 100644 --- a/ravenframework/Optimizers/GradientDescent.py +++ b/ravenframework/Optimizers/GradientDescent.py @@ -279,12 +279,14 @@ def initialize(self, externalSeeding=None, solutionExport=None): @ In, solutionExport, DataObject, optional, a PointSet to hold the solution @ Out, None """ - RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) self._gradientInstance.initialize(self.toBeSampled) self._stepInstance.initialize(self.toBeSampled, persistence=self._requiredPersistence) self._acceptInstance.initialize() # set the batch size self.batch = 1 + self._gradientInstance.numGradPoints() + # we need to set the batch size before calling base class initialize + RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) + # if single trajectory, turn off follower termination if len(self._initialValues) < 2: self.raiseADebug('Setting terminateFollowers to False since only 1 trajectory exists.') diff --git a/ravenframework/Optimizers/RavenSampled.py b/ravenframework/Optimizers/RavenSampled.py index e737597f90..bac41f8be6 100644 --- a/ravenframework/Optimizers/RavenSampled.py +++ b/ravenframework/Optimizers/RavenSampled.py @@ -160,9 +160,6 @@ def initialize(self, externalSeeding=None, solutionExport=None): @ Out, None """ Optimizer.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) - # TODO remove - # self.batch = 1 - # self.batchId = 0 ############### # Run Methods # @@ -231,7 +228,6 @@ def localGenerateInput(self, batch, model, modelInput): # TODO shouldn't this check be in the Ready check? if self.counters['samples'] + self.batch > self.limits['samples'] + 1: raise utils.NoMoreSamplesNeeded - return None # TODO raise an error? something? for rlz in batch: inputInfo = rlz.inputInfo # get point from stack From 64504552421c75e0a6fac1c52dff650f5625abbe Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 18 Dec 2024 12:21:41 -0700 Subject: [PATCH 15/18] cleaned up a lot of stuff --- ravenframework/DataObjects/DataSet.py | 8 --- ravenframework/Models/Code.py | 52 ----------------- ravenframework/Models/Dummy.py | 5 -- ravenframework/Models/EnsembleModel.py | 30 ++-------- .../Models/HybridModels/HybridModel.py | 5 +- .../Models/HybridModels/HybridModelBase.py | 2 - ravenframework/Models/Model.py | 21 ------- ravenframework/Optimizers/GeneticAlgorithm.py | 7 +-- ravenframework/Optimizers/GradientDescent.py | 3 - ravenframework/Optimizers/Optimizer.py | 9 --- ravenframework/Optimizers/RavenSampled.py | 24 ++------ ravenframework/Samplers/CustomSampler.py | 6 -- ravenframework/Samplers/EnsembleForward.py | 9 --- ravenframework/Samplers/Grid.py | 4 -- ravenframework/Samplers/Sampler.py | 58 +++---------------- ravenframework/Steps/MultiRun.py | 1 - ravenframework/Steps/SingleRun.py | 1 - 17 files changed, 23 insertions(+), 222 deletions(-) diff --git a/ravenframework/DataObjects/DataSet.py b/ravenframework/DataObjects/DataSet.py index 8d349a42a6..60da1ca704 100644 --- a/ravenframework/DataObjects/DataSet.py +++ b/ravenframework/DataObjects/DataSet.py @@ -638,16 +638,8 @@ def realization(self, index=None, matchDict=None, noMatchDict=None, tol=1e-15, u rlzs = rlz if type(rlz).__name__ == "list" else [rlz] rlzs = [self._addIndexMapToRlz(rl) for rl in rlzs] dims = self.getDimensions() - # print('*'*80) - # print('DEBUGG whoami:', self.name) - # print('DEBUGG dims:', dims) for index, rl in enumerate(rlzs): d = {k:{'dims':tuple(dims[k]) ,'data': v} for (k,v) in rl.items() if k not in ['_indexMap']} - # print('*'*80) - # print('DEBUGG d:') - # for k, v in d.items(): - # print(k, v) - # print('*'*80) rlz[index] = xr.Dataset.from_dict(d) if len(rlzs) > 1: # concatenate just in case there are multiple realizations diff --git a/ravenframework/Models/Code.py b/ravenframework/Models/Code.py index 3b7957f815..af5dab93d3 100644 --- a/ravenframework/Models/Code.py +++ b/ravenframework/Models/Code.py @@ -364,16 +364,7 @@ def createNewInput(self, myInput, samplerType, rlz): batchID = rlz.inputInfo['batchID'] rlzID = rlz.inputInfo['prefix'] dirName = f'b{batchID}_r{rlzID}' - # FIXME we're always in batch! subDir = os.path.join(self.workingDir, dirName) - # OLD # - # brun = kwargs.get('batchRun') - # if brun is not None: - # # if batch, the subDir are a combination of prefix (batch id) and batch run id - # bid = kwargs['prefix'] if 'prefix' in kwargs.keys() else '1' - # subDirectory = os.path.join(self.workingDir,'b{}_r{}'.format(bid,brun)) - # else: - # subDirectory = os.path.join(self.workingDir, kwargs['prefix'] if 'prefix' in kwargs.keys() else '1') if not os.path.exists(subDir): os.mkdir(subDir) @@ -395,17 +386,11 @@ def createNewInput(self, myInput, samplerType, rlz): rlz.inputInfo['alias'] = self.alias self._replaceVariablesNamesWithAliasSystem(rlz, 'input', False) - # OLD # - # if 'SampledVars' in kwargs.keys(): - # sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) # FIXME do we force all Codes to update to this new format, or do we grandfather in somehow? # OLD newInput = self.code.createNewInput(newInputSet, self.oriInputFiles, samplerType, **copy.deepcopy(kwargs)) newInput = self.code.createNewInput(newInputSet, self.oriInputFiles, samplerType, rlz) - # if 'SampledVars' in kwargs.keys() and len(self.alias['input'].keys()) != 0: - # kwargs['SampledVars'] = sampledVars - return (newInput, rlz) def _expandCommand(self, origCommand): @@ -498,7 +483,6 @@ def evaluateSample(self, myInput, samplerType, rlz): the second item will be the output of this model given the specified inputs """ - print('DEBUGG rlz type:', type(rlz)) inputFiles = self.createNewInput(myInput, samplerType, rlz) if isinstance(inputFiles, tuple): # FIXME why is this a class variable? Should it be only within this method scope instead? @@ -507,20 +491,12 @@ def evaluateSample(self, myInput, samplerType, rlz): else: self.currentInputFiles = inputFiles metaData = None - # OLD # - # self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None) returnedCommand = self.code.genCommand(self.currentInputFiles, self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec) - ## Given that createNewInput can only return a tuple, I don't think these - ## checks are necessary (keeping commented out until someone else can verify): - # if type(returnedCommand).__name__ != 'tuple': - # self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple") - # if type(returnedCommand[0]).__name__ != 'list': - # self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!") executeCommand, self.outFileRoot = returnedCommand info = rlz.inputInfo @@ -582,7 +558,6 @@ def evaluateSample(self, myInput, samplerType, rlz): returnCode = process.returncode self.raiseADebug(" Process "+str(process.pid)+" finished "+time.ctime()+ " with returncode "+str(process.returncode)) - # procOutput = process.communicate()[0] ## If the returnCode is already non-zero, we should maintain our current ## value as it may have some meaning that can be parsed at some point, so @@ -592,8 +567,6 @@ def evaluateSample(self, myInput, samplerType, rlz): codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory']) if codeFailed: returnCode = -1 - # close the log file - # OLD outFileObject.close() ## END "with open outFileObject" context ## We should try and use the output the code interface gives us first, but @@ -634,9 +607,6 @@ def evaluateSample(self, myInput, samplerType, rlz): loadUtility = self.code.getCsvLoadUtil() csvData = csvLoader.loadCsvFile(outFile.getAbsFile(), nullOK=False, utility=loadUtility) returnDict = csvLoader.toRealization(csvData) - #else: - # # FIXME returnDict is not defined if we get here! - # self.raiseAnError(RuntimeError, 'This should not be reached.') if not ravenCase: # check if the csv needs to be printed @@ -964,24 +934,9 @@ def submit(self, batch, myInput, samplerType, jobHandler): @ In, jobHandler, JobHandler instance, the global job handler instance @ Out, None """ - - # OLD - # nRuns = len(batch) - # batchMode = kwargs.get("batchMode", False) - # if batchMode: - # nRuns = kwargs["batchInfo"]['nRuns'] - for r, rlz in enumerate(batch): #shortcut for convenience info = rlz.inputInfo - # UNUSED prefix = info['prefix'] - # FIXME find out who uses this and update where that info gets stored - # -> looks like SupervisedLearning/FeatureSelection/RFE might be the only one - # -> but it also looks like Samplers/StochasticCollocation should be using it? - # -> Should uniqueHandler actually be "requester" and formalized? - # -> Should uniqueHandler be part of the Model at all, or just jobHandler? - # UNUSED uniqueHandler = rlz.inputInfo.get('uniqueHandler', 'any') - ## These two are part of the current metadata, so they will be added before ## the job is started, so that they will be captured in the metadata and match ## the current behavior of the system. If these are not desired, then this @@ -1018,13 +973,6 @@ def submit(self, batch, myInput, samplerType, jobHandler): info['numberNodes' ] = len(nodesList) self.raiseAMessage(f'batch "{batch.ID}" job {r} "{info["prefix"]}" submitted!') - # OLD # - # jobHandler.addJob((self, myInput, samplerType, rlz), - # self.__class__.evaluateSample, - # prefix, - # metadata=metadata, - # uniqueHandler=uniqueHandler, - # groupInfo=groupInfo) # submit batch of jobs together ## This may look a little weird, but due to how the parallel python library ## works, we are unable to pass a member function as a job because the diff --git a/ravenframework/Models/Dummy.py b/ravenframework/Models/Dummy.py index cb2156a63c..bcb5317aa3 100644 --- a/ravenframework/Models/Dummy.py +++ b/ravenframework/Models/Dummy.py @@ -153,11 +153,6 @@ def createNewInput(self, myInput, samplerType, rlz): self._replaceVariablesNamesWithAliasSystem(rlz, 'input', False) for var, val in rlz.items(): inputDict[var] = np.atleast_1d(val) - ### OLD ### - # if 'SampledVars' in kwargs.keys(): - # sampledVars = self._replaceVariablesNamesWithAliasSystem(kwargs['SampledVars'],'input',False) - # for key in kwargs['SampledVars'].keys(): - # inputDict[key] = np.atleast_1d(kwargs['SampledVars'][key]) missing = list(var for var,val in inputDict.items() if val is None) if len(missing) != 0: diff --git a/ravenframework/Models/EnsembleModel.py b/ravenframework/Models/EnsembleModel.py index 97eda13244..d45cd50593 100644 --- a/ravenframework/Models/EnsembleModel.py +++ b/ravenframework/Models/EnsembleModel.py @@ -15,25 +15,21 @@ EnsembleModel module, containing the class and methods to create a comunication 'pipeline' among different models in terms of Input/Output relation """ -#External Modules---------------------------------------------------------------------------------- import io import sys import copy import time import itertools from collections import OrderedDict + import numpy as np -#External Modules End-------------------------------------------------------------------------------- -#Internal Modules------------------------------------------------------------------------------------ from ..Decorators.Parallelization import Parallel from .Dummy import Dummy from ..utils import utils, InputData from ..utils.graphStructure import evaluateModelsOrder from ..Runners import Error as rerror from ..Runners.SharedMemoryRunner import InterruptibleThread -from ..Realizations import Realization -#Internal Modules End-------------------------------------------------------------------------------- class EnsembleModel(Dummy): """ @@ -538,13 +534,7 @@ def submit(self, batch, myInput, samplerType, jobHandler): self.localPollingThread.daemon = True self.localPollingThread.start() - for r, rlz in enumerate(batch): - # OLD # - # if batchMode: - # kw = kwargs['batchInfo']['batchRealizations'][index] - # kw['batchRun'] = index + 1 - # else: - # kw = kwargs + for rlz in batch: info = rlz.inputInfo prefix = info.get("prefix") info['jobHandler'] = jh # NOTE gets overwritten below for parallel strat 2 @@ -559,8 +549,6 @@ def submit(self, batch, myInput, samplerType, jobHandler): info['jobHandler'] = self.localJobHandler # make sure that the batchMode is set to False in the inner runs since only the # ensemble model evaluation should be batched (THIS IS REQUIRED because the CODE does not submit runs like the other models) - # TODO FIXME how does this work now with batches? - # kw['batchMode'] = False jobHandler.addClientJob( (self, myInput, samplerType, rlz), self.__class__.evaluateSample, @@ -595,7 +583,7 @@ def __retrieveDependentOutput(self,modelIn,listOfOutputs, typeOutputs): dependentOutputs['_indexMap'][inKey] = indices return dependentOutputs - def _externalRun(self, inRun, jobHandler=None):#, jobHandler): + def _externalRun(self, inRun, jobHandler=None): """ Method that performs the actual run of the ensemble model (separated from run method for parallelization purposes) @ In, inRun, tuple, tuple of Inputs, e.g. @@ -610,9 +598,7 @@ def _externalRun(self, inRun, jobHandler=None):#, jobHandler): """ originalInput = inRun[0] samplerType = inRun[1] - subRlzs = inRun[2] # OLD inputRlz = inRun[2] - # OLD inputInfo = inputRlz.inputInfo - # OLD identifier = inputInfo.pop('prefix') + subRlzs = inRun[2] identifier = subRlzs['__setIdentifier'] tempOutputs = {} inRunTargetEvaluations = {} @@ -676,28 +662,20 @@ def _externalRun(self, inRun, jobHandler=None):#, jobHandler): self.raiseAnError(IOError,"No initial conditions provided for variable "+ initialConditionToSet) # set new identifiers suffix = '' - # TODO how do I need to modify this for new batch run? - # if 'batchRun' in inputKwargs[modelIn]: - # suffix = f"{utils.returnIdSeparator()}{inputKwargs[modelIn]['batchRun']}" # FIXME this was already set in the createNewInput method! # -> the Suffix is added. Should this be something the Batch takes care of? inputInfo['prefix'] += f"{suffix}" - # OLD inputInfo['prefix'] = f"{modelIn}{utils.returnIdSeparator()}{identifier}{suffix}" inputInfo['uniqueHandler'] = f"{self.name}{identifier}{suffix}" if metadataToTransfer is not None: inputInfo['metadataToTransfer'] = metadataToTransfer for var in dependentOutput: - #inputInfo[modelIn]["SampledVars" ][key] = dependentOutput[key] ## FIXME it is a mistake (Andrea). The SampledVarsPb for this variable should be transferred from outside ## Who has this information? -- DPM 4/11/17 inputInfo["SampledVarsPb"][var] = 1. self._replaceVariablesNamesWithAliasSystem(inputRlz, 'input', False) self._replaceVariablesNamesWithAliasSystem(inputInfo["SampledVarsPb"], 'input', False) ## FIXME: this will come after we rework the "runInfo" collection in the code - ## if run info is present, we need to pass to to kwargs - ##if self.runInfoDict and 'Code' == self.modelsDictionary[modelIn]['Instance'].type: - ## inputKwargs[modelIn].update(self.runInfoDict) retDict, gotOuts, evaluation = self.__advanceModel(identifier, self.modelsDictionary[modelIn], diff --git a/ravenframework/Models/HybridModels/HybridModel.py b/ravenframework/Models/HybridModels/HybridModel.py index f7f333aaaa..60857433b5 100644 --- a/ravenframework/Models/HybridModels/HybridModel.py +++ b/ravenframework/Models/HybridModels/HybridModel.py @@ -247,7 +247,6 @@ def createNewInput(self, myInput, samplerType, rlz): if useROM: identifier = info['prefix'] subRlzs = {} - # OLD newKwargs = {'prefix':identifier, 'useROM':useROM} for romName in self.romsDictionary.keys(): featsList = self.romsDictionary[romName]['Instance'].getInitParams()['Features'] subRlz = rlz.createSubsetRlz(featsList) @@ -502,9 +501,9 @@ def _externalRun(self, inRun, jobHandler): @ Out, exportDict, dict, dict of results from this hybrid model """ self.raiseADebug("External Run") - subRlzs = inRun[2] # OLD was inputKwargs, comes from createNewInput + subRlzs = inRun[2] oneRlz = next(iter(subRlzs)) - identifier = oneRlz.inputInfo['prefix'] # FIXME should be batch ID, not sample ID + identifier = oneRlz.inputInfo['prefix'] # FIXME should be batch ID, not sample ID? # TODO attach this to the batch, instead of the single realizations? useROM = oneRlz.inputInfo['useROM'] # TODO need pop? inputKwargs.pop('useROM') uniqueHandler = self.name + identifier diff --git a/ravenframework/Models/HybridModels/HybridModelBase.py b/ravenframework/Models/HybridModels/HybridModelBase.py index a68580da6c..f226611a62 100644 --- a/ravenframework/Models/HybridModels/HybridModelBase.py +++ b/ravenframework/Models/HybridModels/HybridModelBase.py @@ -189,8 +189,6 @@ def evaluateSample(self, myInput, samplerType, rlz): self.raiseADebug("Evaluate Sample") excludeKeys = ['jobHandler'] kwargsKeys = list(x for x in rlz.inputInfo.keys() if x not in excludeKeys) - # FIXME what all needs to go in this? rlz.asDict? - # OLD kwargsToKeep = {keepKey: kwargs[keepKey] for keepKey in kwargsKeys} jobHandler = rlz.inputInfo['jobHandler'] newInput = self.createNewInput(myInput, samplerType, rlz) ## Unpack the specifics for this class, namely just the jobHandler diff --git a/ravenframework/Models/Model.py b/ravenframework/Models/Model.py index 20213e7721..df47adc9b0 100644 --- a/ravenframework/Models/Model.py +++ b/ravenframework/Models/Model.py @@ -431,27 +431,6 @@ def submit(self, batch, myInput, samplerType, jobHandler): @ Out, None """ jobHandler.addJobBatch(batch, self, myInput, samplerType, self.__class__.evaluateSample) - ### OLD ### - # for rlz in batch: - # if rlz.isRestart: - # jobHandler.addFinishedJob(rlz, metadata=rlz.inputInfo) - # else: - # prefix = rlz.get('prefix') - # uniqueHandler = rlz.inputInfo.get('uniqueHandler', 'any') - # forceThreads = rlz.inputInfo.get('forceThreads', False) - # groupInfo = { - # 'id': rlz.inputInfo['batchId'], - # 'size': len(batch), - # } - # jobHandler.addJob( - # (self, myInput, samplerType, rlz.inputInfo), - # self.__class__.evaluateSample, - # prefix, - # metadata = rlz.inputInfo, - # uniqueHandler=uniqueHandler, - # forceUseThreads=forceThreads, - # groupInfo=groupInfo - # ) def addOutputFromExportDictionary(self,exportDict,output,options,jobIdentifier): """ diff --git a/ravenframework/Optimizers/GeneticAlgorithm.py b/ravenframework/Optimizers/GeneticAlgorithm.py index 555c56cc7f..32bb21dbaf 100644 --- a/ravenframework/Optimizers/GeneticAlgorithm.py +++ b/ravenframework/Optimizers/GeneticAlgorithm.py @@ -393,12 +393,9 @@ def initialize(self, externalSeeding=None, solutionExport=None): @ In, solutionExport, DataObject, optional, a PointSet to hold the solution @ Out, None """ - RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) - - # TODO remove, moved to Sampler.initialize - # meta = ['batchId'] - # self.addMetaKeys(meta) self.batch = self._populationSize + # initialize must be called afer setting the batch size + RavenSampled.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) if self._populationSize != len(self._initialValues): self.raiseAnError(IOError, f'Number of initial values provided for each variable is {len(self._initialValues)}, while the population size is {self._populationSize}') for _, init in enumerate(self._initialValues): diff --git a/ravenframework/Optimizers/GradientDescent.py b/ravenframework/Optimizers/GradientDescent.py index d74b0d2cee..eb217973ef 100644 --- a/ravenframework/Optimizers/GradientDescent.py +++ b/ravenframework/Optimizers/GradientDescent.py @@ -734,16 +734,13 @@ def _rejectOptPoint(self, traj, info, old): @ Out, none """ # cancel grad runs - # FIXME temp disable, can we actually cancel these still in batching? self._cancelAssociatedJobs(info['traj'], step=info['step']) # what do do if a point is rejected? # for now, rerun the opt point and gradients, AND cut step # TODO user option to EITHER rerun opt point OR cut step! # initialize a new step self._initializeStep(traj) - # update prefix, batch IDs? or is that something that should happen ... somewhere else? - # FIXME do we also need to register these differently? # track that the next recommended step size for this traj should be "cut" self._stepRecommendations[traj] = 'shrink' # get new grads around new point diff --git a/ravenframework/Optimizers/Optimizer.py b/ravenframework/Optimizers/Optimizer.py index e521ca0ce8..4202f6ea75 100644 --- a/ravenframework/Optimizers/Optimizer.py +++ b/ravenframework/Optimizers/Optimizer.py @@ -290,7 +290,6 @@ def initialize(self, externalSeeding=None, solutionExport=None): for entry in self.assemblerDict.get('ImplicitConstraint', []): self._impConstraintFunctions.append(entry[3]) - # FIXME can this superclass call be at the top of this method? AdaptiveSampler.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport) # sampler @@ -437,14 +436,6 @@ def _initializeInitSampler(self, externalSeeding): # TODO this doesn't technically guarantee that each var in toBeSampled has a value. # Can we check against this, or will it error in an intelligent way? - #### OLD #### - # # NOTE by looping over self.toBeSampled, we could potentially not error out when extra vars are sampled - # for var in self.toBeSampled: - # # TODO is var ever not in rlz? Should this be an error out? - # if var in rlz: - # self._initialValues[n][var] = rlz[var] # TODO float or np.1darray? - #### END OLD #### - def initializeTrajectory(self, traj=None): """ Sets up a new trajectory. diff --git a/ravenframework/Optimizers/RavenSampled.py b/ravenframework/Optimizers/RavenSampled.py index bac41f8be6..1d28aa0148 100644 --- a/ravenframework/Optimizers/RavenSampled.py +++ b/ravenframework/Optimizers/RavenSampled.py @@ -273,28 +273,16 @@ def localFinalizeActualSampling(self, jobObject, model, myInput): if self._targetEvaluation.isEmpty: self.raiseAnError(RuntimeError, f'Expected to find batch "{batchID}" in TargetEvaluation "{self._targetEvaluation.name}", but it is empty!') # get information and realization, and update trajectories - # OLD info = self.getIdentifierFromPrefix(prefix, pop=True) - if self.batch == 0: # FIXME should never be true - DEPRECATE - # _, rlz = self._targetEvaluation.realization(matchDict={'prefix': prefix}, asDataSet=False) - # if rlz is None: - # self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("prefix")}') - else: - # NOTE if here, then rlz is actually a xr.Dataset, NOT a dictionary!! - _, data = self._targetEvaluation.realization(matchDict={'batchID': batchID}, asDataSet=False, first=False) - if data is None: - self.raiseAnError(RuntimeError, f'Expected to find batch with ID "{batchID}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("batchID")}') - # NOTE in this case "rlz" is ACTUALLY a xr.Dataset with multiple realizations in it! - # _, full = self._targetEvaluation.realization(matchDict={'prefix': prefix}, asDataSet=False) - # _, full = self._targetEvaluation.realization(matchDict={'prefix': prefix}) - # if full is None: - # self.raiseAnError(RuntimeError, f'Expected to find entry with prefix "{prefix}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("prefix")}') + # TODO FIXME receive as a dataset instead of dicts? Might be faster. Might be a lot faster. + _, data = self._targetEvaluation.realization(matchDict={'batchID': batchID}, asDataSet=False, first=False) + if data is None: + self.raiseAnError(RuntimeError, f'Expected to find batch with ID "{batchID}" in TargetEvaluation! Found: {self._targetEvaluation.getVarValues("batchID")}') + # NOTE in this case "rlz" is ACTUALLY a xr.Dataset with multiple realizations in it! # trim down opt point to the useful parts # TODO making a new dict might be costly, maybe worth just passing whole point? # # testing suggests no big deal on smaller problem - # TODO FIXME receive as a dataset instead of dicts? Might be faster. Might be a lot faster. self.raiseADebug('Processing new batch results ...') - for r, rlz in enumerate(data): + for rlz in data: prefix = rlz['prefix'] if not self.stillLookingForPrefix(prefix): # should we be skipping all of them if any aren't being looked for? diff --git a/ravenframework/Samplers/CustomSampler.py b/ravenframework/Samplers/CustomSampler.py index eb607a38b3..6060285ca9 100644 --- a/ravenframework/Samplers/CustomSampler.py +++ b/ravenframework/Samplers/CustomSampler.py @@ -291,16 +291,10 @@ def localGenerateInput(self, rlzBatch, model, myInput): rlz.inputInfo['ProbabilityWeight'] = self.infoFromCustom['ProbabilityWeight'][index] rlz.indexMap = self.sourceIndexMap rlz.inputInfo['SamplerType'] = 'Custom' - # if rlz.inputInfo['batchMode']: - # rlz.inputInfo['SampledVars'] = rlz - # rlz.inputInfo['batchId'] = self.name + str(self.batchId) - # batchData.append(copy.deepcopy(rlz.inputInfo)) if batchMode: self._incrementCounter() if batchMode: rlzBatch.ID = self.name + str(self.batchId) - FIXME # where does this data go? Fix along with GA - #rlzBatch.inputInfo['batchInfo'] = {'nRuns': self.batch, 'batchRealizations': batchData, 'batchId': self.name + str(self.batchId)} def flush(self): """ diff --git a/ravenframework/Samplers/EnsembleForward.py b/ravenframework/Samplers/EnsembleForward.py index 5f52755123..67ab503fa3 100644 --- a/ravenframework/Samplers/EnsembleForward.py +++ b/ravenframework/Samplers/EnsembleForward.py @@ -230,19 +230,11 @@ def localGenerateInput(self, rlz, model, modelInput): @ Out, None """ index = self.gridEnsemble.returnPointAndAdvanceIterator(returnDict=True) - print('') - print('') - print('') - print('DEBUGG rlz:', [rlz.keys()]) - print('DEBUGG index:', index) coordinate = [] for samplingStrategy in self.instantiatedSamplers: coordinate.append(self.samplersCombinations[samplingStrategy][int(index[samplingStrategy])]) for combination in coordinate: - print('') - print('DEBUGG ... combo:', combination) for key, value in combination.items(): - print('DEBUGG ... ... key:', key, type(value)) # FIXME we don't know what's inputInfo and what's sampled vars! if key in self.toBeSampled: rlz[key] = value @@ -250,7 +242,6 @@ def localGenerateInput(self, rlz, model, modelInput): rlz.inputInfo[key] = value else: if isinstance(rlz.inputInfo[key], dict) and len(value): - print('DEBUG ... ... val:', len(value), value) rlz.inputInfo[key].update(value) else: raise RuntimeError # can we get here? diff --git a/ravenframework/Samplers/Grid.py b/ravenframework/Samplers/Grid.py index aa3d90caf2..f4df50f5e7 100644 --- a/ravenframework/Samplers/Grid.py +++ b/ravenframework/Samplers/Grid.py @@ -131,10 +131,6 @@ def localGetCurrentSetting(self): and each parameter's initial value as the dictionary values """ paramDict = {} - # for var, value in self.values.items(): - # paramDict[f'coordinate {var} has value'] = value - # FIXME we don't have rlz yet - return paramDict def localInitialize(self): diff --git a/ravenframework/Samplers/Sampler.py b/ravenframework/Samplers/Sampler.py index b26f10b3cc..33425d8a55 100644 --- a/ravenframework/Samplers/Sampler.py +++ b/ravenframework/Samplers/Sampler.py @@ -547,21 +547,6 @@ def _readInVariable(self, child, prefix): self.toBeSampled[prefix + name] = distName # set up mapping for variable to distribution self.variables2distributionsMapping[name] = varData - # ##### OLD ##### - # # name of the distribution to sample - # toBeSampled = childChild.value - # varData = {} - # varData['name'] = childChild.value - # # variable dimensionality - # if 'dim' not in childChild.parameterValues: - # dim = 1 - # else: - # dim = childChild.parameterValues['dim'] - # varData['dim'] = dim - # # set up mapping for variable to distribution - # self.variables2distributionsMapping[varName] = varData - # # flag distribution as needing to be sampled - # self.toBeSampled[prefix + varName] = toBeSampled elif childChild.getName() == 'function': # can only have a function if doesn't already have a distribution or function if not foundDistOrFunc: @@ -683,9 +668,8 @@ def initialize(self, externalSeeding=None, solutionExport=None): else: self.localInitialize() - for distrib in self.NDSamplingParams: + for distrib, params in self.NDSamplingParams.items(): if distrib in self.distributions2variablesMapping: - params = self.NDSamplingParams[distrib] temp = utils.first(self.distributions2variablesMapping[distrib][0].keys()) try: self.distDict[temp].updateRNGParam(params) @@ -699,15 +683,14 @@ def initialize(self, externalSeeding=None, solutionExport=None): # Store the transformation matrix in the metadata if self.variablesTransformationDict: self.entitiesToRemove = [] - for variable in self.variables2distributionsMapping: - distName = self.variables2distributionsMapping[variable]['name'] - dim = self.variables2distributionsMapping[variable]['dim'] - totDim = self.variables2distributionsMapping[variable]['totDim'] + for variable, data in self.variables2distributionsMapping.items(): + distName = data['name'] + dim = data['dim'] + totDim = data['totDim'] if totDim > 1 and dim == 1: transformDict = {} transformDict['type'] = self.distDict[variable.strip()].type transformDict['transformationMatrix'] = self.distDict[variable.strip()].transformationMatrix() - # FIXME not inputInfo, where should this go? self.samplerInfo[f'transformation-{distName}'] = transformDict self.entitiesToRemove.append(f'transformation-{distName}') @@ -923,20 +906,6 @@ def _formNDVariables(self, rlzBatch): @ Out, None """ for rlz in rlzBatch: - # TODO REMOVE - # for baseName, info in self.ndVariables.items(): - # shape = info['shape'] - # # collect all the values from the split variables - # values = [] - # entries = np.zeros(shape).size - # for i in range(entries): - # var = baseName - # if entries > 1: - # var += _vectorPostfixFormat.format(ID=str(i)) - # values.append(self.inputInfo['SampledVars'].pop(var)) - # # shape values into the requested format - # self.inputInfo['SampledVars'][baseName] = np.asarray(values).reshape(shape) - # # TODO does other data need extracting, like probability weights and etc? for baseName, info in self.ndVariables.items(): shape = info['shape'] dims = info['dims'] @@ -985,9 +954,9 @@ def _evaluateFunctionsOrder(self): @ Out, None """ functionsToVariables = {} - for var in self.funcDict: + for var, inst in self.funcDict.items(): outputMatch = [] - functionInputs = self.funcDict[var].instance.parameterNames() + functionInputs = inst.instance.parameterNames() for inpVar in functionInputs: # find functions that are linked to this inpVar if inpVar in self.funcDict: @@ -1039,7 +1008,7 @@ def _incrementCounter(self, numAdded=1): if self.reseedAtEachIteration: randomUtils.randomSeed(self.counters['seeding'] - 1) # FIXME this may be setting the BATCH prefix, not the SAMPLE prefix - # -> so let's move it out of this method + # -> so let's move it out of this method -> TODO REMOVE # self.samplerInfo['prefix'] = str(self.counters['batches']) #FIXME is this useful, or should we be using the counters? def _performVariableTransform(self, rlzBatch): @@ -1103,7 +1072,6 @@ def generateInput(self, model, modelInput): @ Out, modelInput, potentially perturbed? original inputs for model, or None if taken from restart """ if model is not None: - # FIXME does samplerInfo have all the information? It should ... model.getAdditionalInputEdits(self.samplerInfo) ##### GENERATE SAMPLE ##### # instantiate a batch of data carrier realizations @@ -1171,7 +1139,7 @@ def generateInputBatch(self, myInput, model, batchSize, projector=None): @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ - FIXME # used? -> should be moved to using batch system! + DEPRECATE # used? -> should be moved to using batch system! newInputs = [] while self.amIreadyToProvideAnInput() and (self.counters['samples'] < batchSize): if projector is None: @@ -1200,13 +1168,6 @@ def pcaTransform(self, rlz, varsDict, dist): @ In, dist, string, the distribution name associated with given variable set @ Out, None """ - # def _applyTransformation(values): - # """ - # TODO can this just be collapsed down to a single call now without a wrapper?? - # Wrapper to apply the pca transformation - # @ In, values, dict, dictionary of sampled vars - # @ Out, None # TODO REMOVE values, dict, the updated set of values - # """ latentVariablesValues = [] listIndex = [] manifestVariablesValues = [None] * len(varsDict['manifestVariables']) @@ -1221,7 +1182,6 @@ def pcaTransform(self, rlz, varsDict, dist): manifestVariablesValues[index2] = varsValues[index1] manifestVariablesDict = dict(zip(varsDict['manifestVariables'],manifestVariablesValues)) rlz.update(manifestVariablesDict) - # TODO REMOVE_applyTransformation(rlz) def _checkSample(self, rlz): """ diff --git a/ravenframework/Steps/MultiRun.py b/ravenframework/Steps/MultiRun.py index 96c286e17f..5cfbd92ba1 100644 --- a/ravenframework/Steps/MultiRun.py +++ b/ravenframework/Steps/MultiRun.py @@ -129,7 +129,6 @@ def _localInitializeStep(self, stepEntities): if sampler.amIreadyToProvideAnInput(): try: batch, modelInp = sampler.generateInput(model, inputs) - # OLD batch, modelInp = self._findANewInputToRun(inDictionary[self.samplerType], inDictionary['Model'], inDictionary['Input'], inDictionary['Output'], inDictionary['jobHandler']) model.submit(batch, modelInp, sampler.type, jobHandler) self.raiseAMessage(f'Submitted input batch {inputIndex+1}') except utils.NoMoreSamplesNeeded: diff --git a/ravenframework/Steps/SingleRun.py b/ravenframework/Steps/SingleRun.py index 0c3119735d..51f354c609 100644 --- a/ravenframework/Steps/SingleRun.py +++ b/ravenframework/Steps/SingleRun.py @@ -192,7 +192,6 @@ def _localTakeAstepRun(self, inDictionary): 'additionalEdits': {}, }) model.submit(batch, inputs, None, jobHandler) - # OLD model.submit(inputs, None, jobHandler, **{'SampledVars': {'prefix':'None'}, 'additionalEdits': {}}) # FIXME make this match multirun, and maybe share the code? while True: finishedJobs = jobHandler.getFinished() From 0bd7f0d273642ee63fd7140ab5eeac33b84b3636 Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 18 Dec 2024 12:57:08 -0700 Subject: [PATCH 16/18] converted samplers to using limits, counters --- .../Samplers/AdaptiveDynamicEventTree.py | 5 ++- ravenframework/Samplers/AdaptiveMonteCarlo.py | 10 +++--- ravenframework/Samplers/CustomSampler.py | 12 +++---- ravenframework/Samplers/DynamicEventTree.py | 14 ++++---- ravenframework/Samplers/EnsembleForward.py | 10 +++--- ravenframework/Samplers/FactorialDesign.py | 4 +-- ravenframework/Samplers/Grid.py | 4 +-- ravenframework/Samplers/LimitSurfaceSearch.py | 8 ++--- ravenframework/Samplers/MonteCarlo.py | 7 ++-- .../Samplers/ResponseSurfaceDesign.py | 6 ++-- ravenframework/Samplers/Sobol.py | 6 ++-- .../Samplers/SparseGridCollocation.py | 6 ++-- ravenframework/Samplers/Stratified.py | 32 +++++++++++++++---- 13 files changed, 70 insertions(+), 54 deletions(-) diff --git a/ravenframework/Samplers/AdaptiveDynamicEventTree.py b/ravenframework/Samplers/AdaptiveDynamicEventTree.py index 7de724206b..c6fc87a9fe 100644 --- a/ravenframework/Samplers/AdaptiveDynamicEventTree.py +++ b/ravenframework/Samplers/AdaptiveDynamicEventTree.py @@ -117,7 +117,6 @@ def _localWhatDoINeed(self): @ In, None @ Out, needDict, dict, dictionary listing needed objects """ - #adaptNeedInst = self.limitSurfaceInstances.values()[-1]._localWhatDoINeed() needDict = dict(itertools.chain(LimitSurfaceSearch._localWhatDoINeed(self).items(),DynamicEventTree._localWhatDoINeed(self).items())) return needDict @@ -270,7 +269,7 @@ def _constructEndInfoFromBranch(self, rlz, model, myInput, info, cdfValues): @ Out, None """ endInfo = info['parentNode'].get('endInfo') - self.counter += 1 + self.counters['samples'] += 1 self.branchCountOnLevel = info['actualBranchOnLevel']+1 # Get Parent node name => the branch name is creating appending to this name a comma and self.branchCountOnLevel counter rname = info['parentNode'].get('name') + '-' + str(self.branchCountOnLevel) @@ -373,7 +372,7 @@ def localStillReady(self,ready): @ In, ready, bool, a boolean representing whether the caller is prepared for another input. @ Out, ready, bool, a boolean representing whether the caller is prepared for another input. """ - if self.counter == 0: + if self.counters['samples'] == 0: return True if len(self.RunQueue['queue']) != 0: detReady = True diff --git a/ravenframework/Samplers/AdaptiveMonteCarlo.py b/ravenframework/Samplers/AdaptiveMonteCarlo.py index b97996a5d1..1202250d95 100644 --- a/ravenframework/Samplers/AdaptiveMonteCarlo.py +++ b/ravenframework/Samplers/AdaptiveMonteCarlo.py @@ -96,7 +96,7 @@ def __init__(self): AdaptiveSampler.__init__(self) self.persistence = 5 # this is the number of times the error needs to fell below the tolerance before considering the sim converged self.persistenceCounter = 0 # Counter for the persistence - self.forceIteration = False # flag control if at least a self.limit number of iteration should be done + self.forceIteration = False # flag control if at least a self.limits[samples] number of iteration should be done self.solutionExport = None # data used to export the solution (it could also not be present) self.tolerance = {} # dictionary stores the tolerance for each variables self.basicStatPP = None # post-processor to compute the basic statistics @@ -117,7 +117,7 @@ def localInputAndChecks(self,xmlNode, paramInput): for grandchild in child.subparts: tag = grandchild.getName() if tag == "limit": - self.limit = grandchild.value + self.limits['samples'] = grandchild.value elif tag == "persistence": self.persistence = grandchild.value self.raiseADebug(f'Persistence is set at {self.persistence}') @@ -149,7 +149,7 @@ def localInputAndChecks(self,xmlNode, paramInput): for target in info['targets']: metaVar = prefix + '_ste_' + target self.tolerance[metaVar] = info['tol'] - if self.limit is None: + if self.limits['samples'] is None: self.raiseAnError(IOError, f'{self.type} requires a to be specified!') def localInitialize(self, solutionExport=None): @@ -186,9 +186,9 @@ def localFinalizeActualSampling(self, jobObject, model, myInput): @ In, myInput, list, the generating input @ Out, None """ - if self.counter > 1: + if self.counters['samples'] > 1: output = self.basicStatPP.run(self._targetEvaluation) - output['solutionUpdate'] = np.asarray([self.counter - 1]) + output['solutionUpdate'] = np.asarray([self.counters['samples'] - 1]) self._solutionExport.addRealization(output) self.checkConvergence(output) diff --git a/ravenframework/Samplers/CustomSampler.py b/ravenframework/Samplers/CustomSampler.py index 6060285ca9..ed581f741b 100644 --- a/ravenframework/Samplers/CustomSampler.py +++ b/ravenframework/Samplers/CustomSampler.py @@ -205,7 +205,7 @@ def localInitialize(self): else: self.infoFromCustom['ProbabilityWeight'] = np.ones(lenRlz) - self.limit = len(utils.first(self.pointsToSample.values())) + self.limits['samples'] = len(utils.first(self.pointsToSample.values())) else: self.readingFrom = 'DataObject' dataObj = self.assemblerDict['Source'][0][3] @@ -217,11 +217,11 @@ def localInitialize(self): sourceName = self.nameInSource[subVar] if sourceName not in dataObj.getVars() + dataObj.getVars('indexes'): self.raiseAnError(IOError, f"the variable {sourceName} not found in {dataObj.type} {dataObj.name}") - self.limit = len(self.pointsToSample) + self.limits['samples'] = len(self.pointsToSample) self.sourceIndexMap = dataObj.getDimensions() # if "index" provided, limit sampling to those points if self.indexes is not None: - self.limit = len(self.indexes) + self.limits['samples'] = len(self.indexes) maxIndex = max(self.indexes) if maxIndex > len(self.pointsToSample) - 1: self.raiseAnError(IndexError, f'Requested index "{maxIndex}" from custom sampler, but highest index sample is "{len(self.pointsToSample) - 1}"!') @@ -252,10 +252,10 @@ def localGenerateInput(self, rlzBatch, model, myInput): batchMode = False for rlz in rlzBatch: if self.indexes is None: - index = self.counter - 1 + index = self.counters['samples'] - 1 else: - index = self.indexes[self.counter-1] - if self.counter == self.limit + 1: + index = self.indexes[self.counters['samples']-1] + if self.counters['samples'] == self.limits['samples'] + 1: break if self.readingFrom == 'DataObject': # data is stored as slices of a data object, so take from that diff --git a/ravenframework/Samplers/DynamicEventTree.py b/ravenframework/Samplers/DynamicEventTree.py index 55ea3223b0..877f9ed5a0 100644 --- a/ravenframework/Samplers/DynamicEventTree.py +++ b/ravenframework/Samplers/DynamicEventTree.py @@ -182,7 +182,7 @@ def localStillReady(self, ready): @ Out, ready, bool, a boolean representing whether the caller is prepared for another input. """ self._endJobRunnable = max((len(self.RunQueue['queue']),1)) - if(len(self.RunQueue['queue']) != 0 or self.counter == 0): + if(len(self.RunQueue['queue']) != 0 or self.counters['samples'] == 0): ready = True else: if self.printEndXmlSummary: @@ -512,7 +512,7 @@ def _createRunningQueueBeginOne(self, rlz, rootTree, branchedLevel, model, myInp self.RunQueue['identifiers'].append(rlz.inputInfo['prefix']) self.rootToJob[rlz.inputInfo['prefix']] = rname del newInputs - self.counter += 1 + self.counters['samples'] += 1 def _createRunningQueueBegin(self, rlz, model, myInput): """ @@ -567,7 +567,7 @@ def _createRunningQueueBranch(self, rlz, model, myInput, forceEvent=False): nBranches -= 1 # Loop over the branches for which the inputs must be created for _ in range(nBranches): - self.counter += 1 + self.counters['samples'] += 1 self.branchCountOnLevel += 1 branchedLevel = copy.deepcopy(branchedLevelParent) # Get Parent node name => the branch name is creating appending to this name a comma and self.branchCountOnLevel counter @@ -717,7 +717,7 @@ def _createRunningQueue(self, rlz, model, myInput, forceEvent=False): @ In, forceEvent, bool, True if a branching needs to be forced @ Out, None """ - if self.counter >= 1: + if self.counters['samples'] >= 1: # The first DET calculation branch has already been run # Start the manipulation: # Pop out the last endInfo information and the branchedLevel @@ -813,7 +813,7 @@ def localGenerateInput(self, rlz, model, modelInput): @ In, modelInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, newerInput, list, list of new inputs """ - if self.counter <= 1: + if self.counters['samples'] <= 1: # If first branch input, create the queue self._createRunningQueue(rlz, model, modelInput) # retrieve the input from the queue @@ -984,7 +984,7 @@ def localInitialize(self): hybridsampler.initialize() self.hybridNumberSamplers *= hybridsampler.limit while hybridsampler.amIreadyToProvideAnInput(): - hybridsampler.counter +=1 + hybridsampler.counters['samples'] +=1 hybridsampler.localGenerateInput(None,None) hybridsampler._constantVariables() ##### REDUNDANT FUNCTIONALS ##### @@ -1060,6 +1060,6 @@ def localInitialize(self): else: self.branchValues[key] = np.append(self.branchValues[key],self.distDict[key].inverseMarginalDistribution(1.0,self.variables2distributionsMapping[key]['dim']-1)) #self.branchValues[key].append(self.distDict[key].inverseMarginalDistribution(1.0,self.variables2distributionsMapping[key]['dim']-1) ) - self.limit = sys.maxsize + self.limits['samples'] = sys.maxsize # ??? why? # add expected metadata self.addMetaKeys(['RAVEN_parentID','RAVEN_isEnding','conditionalPb','triggeredVariable','happenedEvent']) diff --git a/ravenframework/Samplers/EnsembleForward.py b/ravenframework/Samplers/EnsembleForward.py index 67ab503fa3..afdb02e18b 100644 --- a/ravenframework/Samplers/EnsembleForward.py +++ b/ravenframework/Samplers/EnsembleForward.py @@ -181,7 +181,7 @@ def localInitialize(self): @ In, None @ Out, None """ - self.limit = 1 + self.limits['samples'] = 1 cnt = 0 lowerBounds = {} upperBounds = {} @@ -190,12 +190,12 @@ def localInitialize(self): for samplingStrategy, sampler in self.instantiatedSamplers.items(): sampler.initialize(externalSeeding=self.initSeed, solutionExport=None) self.samplersCombinations[samplingStrategy] = [] - self.limit *= sampler.limit + self.limits['samples'] *= sampler.limits['samples'] lowerBounds[samplingStrategy] = 0 - upperBounds[samplingStrategy] = sampler.limit + upperBounds[samplingStrategy] = sampler.limits['samples'] while sampler.amIreadyToProvideAnInput(): rlz = Realization() - sampler.counter += 1 + sampler.counters['samples'] += 1 sampler.localGenerateInput(rlz, None, None) rlz.inputInfo['prefix'] = sampler.counter self.samplersCombinations[samplingStrategy].append(copy.deepcopy(rlz.asDict())) @@ -204,7 +204,7 @@ def localInitialize(self): metadataKeys.extend(mKeys) metaParams.update(mParams) metadataKeys = list(set(metadataKeys)) - self.raiseAMessage(f'Number of Combined Samples are {self.limit}!') + self.raiseAMessage(f'Total Number of Combined Samples is {self.limits["samples"]}!') # create a grid of combinations (no tensor) self.gridEnsemble = GridEntities.factory.returnInstance('GridEntity') initDict = {'dimensionNames': self.instantiatedSamplers.keys(), diff --git a/ravenframework/Samplers/FactorialDesign.py b/ravenframework/Samplers/FactorialDesign.py index 9bfea62082..731f69c6e3 100644 --- a/ravenframework/Samplers/FactorialDesign.py +++ b/ravenframework/Samplers/FactorialDesign.py @@ -147,7 +147,7 @@ def localInitialize(self): self.designMatrix = doe.pbdesign(len(self.gridInfo.keys())).astype(int) if self.designMatrix is not None: self.designMatrix[self.designMatrix == -1] = 0 # convert all -1 in 0 => we can access to the grid info directly - self.limit = self.designMatrix.shape[0] # the limit is the number of rows + self.limits['samples'] = self.designMatrix.shape[0] # the limit is the number of rows def localGenerateInput(self, rlz, model, myInput): """ @@ -163,7 +163,7 @@ def localGenerateInput(self, rlz, model, myInput): if self.factOpt['algorithmType'] == 'full': Grid.localGenerateInput(self, rlz, model, myInput) else: - self.gridCoordinate = self.designMatrix[self.counter - 1][:].tolist() + self.gridCoordinate = self.designMatrix[self.counters['samples'] - 1][:].tolist() Grid.localGenerateInput(self, rlz, model, myInput) # # diff --git a/ravenframework/Samplers/Grid.py b/ravenframework/Samplers/Grid.py index f4df50f5e7..40f7889d7b 100644 --- a/ravenframework/Samplers/Grid.py +++ b/ravenframework/Samplers/Grid.py @@ -91,7 +91,7 @@ def localInputAndChecks(self, xmlNode, paramInput): #TODO remove using xmlNode if 'limit' in paramInput.parameterValues: self.raiseAnError(IOError,'limit is not used in Grid sampler') - self.limit = 1 + self.limits['samples'] = 1 self.gridEntity._handleInput(paramInput, dimensionTags=["variable", "Distribution"], dimTagsPrefix={"Distribution": ""}) grdInfo = self.gridEntity.returnParameter("gridInfo") @@ -143,7 +143,7 @@ def localInitialize(self): @ Out, None """ self.gridEntity.initialize() - self.limit = self.gridEntity.len() + self.limits['samples'] = self.gridEntity.len() def localGenerateInput(self, rlz, model, oldInput): """ diff --git a/ravenframework/Samplers/LimitSurfaceSearch.py b/ravenframework/Samplers/LimitSurfaceSearch.py index 36d265fd32..c3fd6fe2bc 100644 --- a/ravenframework/Samplers/LimitSurfaceSearch.py +++ b/ravenframework/Samplers/LimitSurfaceSearch.py @@ -212,7 +212,7 @@ def localInputAndChecks(self,xmlNode, paramInput): #TODO remove using xmlNode if 'limit' in xmlNode.attrib.keys(): try: - self.limit = int(xmlNode.attrib['limit']) + self.limits['samples'] = int(xmlNode.attrib['limit']) except ValueError: self.raiseAnError(IOError,'reading the attribute for the sampler '+self.name+' it was not possible to perform the conversion to integer for the attribute limit with value '+xmlNode.attrib['limit']) # convergence Node @@ -228,7 +228,7 @@ def localInputAndChecks(self,xmlNode, paramInput): if 'limit' in convergenceNode.attrib.keys(): attribList.pop(attribList.index('limit')) try: - self.limit = int (convergenceNode.attrib['limit']) + self.limits['samples'] = int (convergenceNode.attrib['limit']) except: self.raiseAnError(IOError,'Failed to convert the limit value '+convergenceNode.attrib['limit']+' to a meaningful number for the convergence') if 'persistence' in convergenceNode.attrib.keys(): @@ -506,7 +506,7 @@ def localStillReady(self,ready): self.converged = True if not self.limitSurfacePP.crossedLimitSurf: self.raiseAWarning("THE LIMIT SURFACE has NOT been crossed. The search FAILED!!!") - self.raiseAMessage('counter: '+str(self.counter)+' Error: {:9.6E} Repetition: {:5d}'.format(testError,self.repetition) ) + self.raiseAMessage(f'counter: {self.counters["samples"]} Error: {testError:9.6E} Repetition: {self.repetition:5d}') #if the number of point on the limit surface is > than compute persistence realAxisNames, cnt = [key.replace('','') for key in self.axisName], 0 if self.solutionExport is not None: @@ -755,7 +755,7 @@ def localGenerateInput(self, rlz, model, oldInput): # the probability weight here is not used, the post processor is going to recreate the grid associated and use a ROM for the probability evaluation rlz.inputInfo['ProbabilityWeight'] = rlz.inputInfo['PointProbability'] self.hangingPoints = np.vstack((self.hangingPoints,copy.copy(np.array([rlz[axis] for axis in self.axisName])))) - self.raiseADebug('At counter '+str(self.counter)+' the generated sampled variables are: '+str(rlz)) + self.raiseADebug(f'At counter {self.counters["samples"]} the generated sampled variables are: {rlz}') rlz.inputInfo['SamplerType'] = 'LimitSurfaceSearch' rlz.inputInfo['subGridTol' ] = self.subGridTol diff --git a/ravenframework/Samplers/MonteCarlo.py b/ravenframework/Samplers/MonteCarlo.py index 3ef03053f0..862b844885 100644 --- a/ravenframework/Samplers/MonteCarlo.py +++ b/ravenframework/Samplers/MonteCarlo.py @@ -77,7 +77,6 @@ def __init__(self): super().__init__() self.printTag = 'SAMPLER MONTECARLO' self.samplingType = None - self.limit = None def localInputAndChecks(self, xmlNode, paramInput): """ @@ -89,7 +88,7 @@ def localInputAndChecks(self, xmlNode, paramInput): # TODO remove using xmlNode Sampler.readSamplerInit(self, xmlNode) if paramInput.findFirst('samplerInit') is not None: - if self.limit is None: + if self.limits['samples'] is None: self.raiseAnError(IOError,self, f'Monte Carlo sampler {self.name} needs the limit block (number of samples) in the samplerInit block') if paramInput.findFirst('samplerInit').findFirst('samplingType') is not None: self.samplingType = paramInput.findFirst('samplerInit').findFirst('samplingType').value @@ -137,8 +136,8 @@ def localGenerateInput(self, rlz, model, modelInput): upper = distData['xMax'] rvsnum = lower + (upper - lower) * randomUtils.random() # TODO (wangc): I think the calculation for epsilon need to be updated as following - # epsilon = (upper-lower)/(self.limit+1) * 0.5 - epsilon = (upper-lower)/self.limit + # epsilon = (upper-lower)/(self.limits['samples']+1) * 0.5 + epsilon = (upper-lower)/self.limits['samples'] midPlusCDF = self.distDict[key].cdf(rvsnum + epsilon) midMinusCDF = self.distDict[key].cdf(rvsnum - epsilon) weight *= midPlusCDF - midMinusCDF diff --git a/ravenframework/Samplers/ResponseSurfaceDesign.py b/ravenframework/Samplers/ResponseSurfaceDesign.py index ff1917344e..54b9a40b5a 100644 --- a/ravenframework/Samplers/ResponseSurfaceDesign.py +++ b/ravenframework/Samplers/ResponseSurfaceDesign.py @@ -67,7 +67,7 @@ def __init__(self): @ Out, None """ Grid.__init__(self) - self.limit = 1 + self.limits['samples'] = 1 self.printTag = 'SAMPLER RESPONSE SURF DESIGN' self.respOpt = {} # response surface design options (type,etc) self.designMatrix = None # matrix container @@ -181,7 +181,7 @@ def localInitialize(self): self.gridEntity.updateParameter("stepLength", stepLength, False) self.gridEntity.updateParameter("gridInfo", gridInfo) Grid.localInitialize(self) - self.limit = self.designMatrix.shape[0] + self.limits['samples'] = self.designMatrix.shape[0] def localGenerateInput(self, rlz, model, myInput): """ @@ -194,7 +194,7 @@ def localGenerateInput(self, rlz, model, myInput): @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ - gridcoordinate = self.designMatrix[self.counter - 1][:].tolist() + gridcoordinate = self.designMatrix[self.counters['samples'] - 1][:].tolist() for cnt, varName in enumerate(self.axisName): self.gridCoordinate[cnt] = self.mapping[varName].index(gridcoordinate[cnt]) Grid.localGenerateInput(self, rlz, model, myInput) diff --git a/ravenframework/Samplers/Sobol.py b/ravenframework/Samplers/Sobol.py index ba6b869be3..934f027efc 100644 --- a/ravenframework/Samplers/Sobol.py +++ b/ravenframework/Samplers/Sobol.py @@ -160,8 +160,8 @@ def localInitialize(self): self.distinctPoints.add(newpt) if newpt not in self.pointsToRun: self.pointsToRun.append(newpt) - self.limit = len(self.pointsToRun) - self.raiseADebug(f'Needed points: {self.limit}') + self.limits['samples'] = len(self.pointsToRun) + self.raiseADebug(f'Needed points: {self.limits['samples']}') initdict={'ROMs':self.ROMs, 'SG':self.SQs, 'dists':self.dists, @@ -181,7 +181,7 @@ def localGenerateInput(self, rlz, model, myInput): @ Out, None """ try: - pt = self.pointsToRun[self.counter-1] + pt = self.pointsToRun[self.counters['samples']-1] except IndexError as ie: self.raiseADebug('All sparse grids are complete! Moving on...') raise utils.NoMoreSamplesNeeded from ie diff --git a/ravenframework/Samplers/SparseGridCollocation.py b/ravenframework/Samplers/SparseGridCollocation.py index eb9c4fb770..3eea123269 100644 --- a/ravenframework/Samplers/SparseGridCollocation.py +++ b/ravenframework/Samplers/SparseGridCollocation.py @@ -194,8 +194,8 @@ def localInitialize(self): outFile.writelines(msg) outFile.close() - self.limit=len(self.sparseGrid) - self.raiseADebug(f'Size of Sparse Grid: {self.limit}') + self.limits['samples'] = len(self.sparseGrid) + self.raiseADebug(f'Size of Sparse Grid: {self.limits['samples']}') self.raiseADebug('Finished sampler generation.') self.raiseADebug('indexset:',self.indexSet) @@ -279,7 +279,7 @@ def localGenerateInput(self, rlz, model, oldInput): @ Out, None """ try: - pt, weight = self.sparseGrid[self.counter-1] + pt, weight = self.sparseGrid[self.counters['samples'] - 1] except IndexError as ie: raise utils.NoMoreSamplesNeeded from ie diff --git a/ravenframework/Samplers/Stratified.py b/ravenframework/Samplers/Stratified.py index 5c46a76532..b40e7ae5e1 100644 --- a/ravenframework/Samplers/Stratified.py +++ b/ravenframework/Samplers/Stratified.py @@ -114,7 +114,7 @@ def localInitialize(self): @ Out, None """ Grid.localInitialize(self) - self.limit = (self.pointByVar-1) + self.limits['samples'] = (self.pointByVar-1) # For the multivariate normal distribtuion, if the user generates the grids on the transformed space, the user needs to provide the grid for each variables, no globalGrid is needed if self.variablesTransformationDict: tempFillingCheck = [[None]*(self.pointByVar-1)]*len(self.gridEntity.returnParameter("dimensionNames")) #for all variables @@ -191,8 +191,14 @@ def localGenerateInput(self, rlz, model, oldInput): for var in sorted_mapping: # if the varName is a comma separated list of strings the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables variable, position = var - upper = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{variable:self.sampledCoordinate[self.counter-1][varCount]+1})[variable] - lower = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{variable:self.sampledCoordinate[self.counter-1][varCount]})[variable] + upper = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {variable:self.sampledCoordinate[self.counters['samples']-1][varCount]+1} + )[variable] + lower = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {variable:self.sampledCoordinate[self.counters['samples']-1][varCount]} + )[variable] varCount += 1 if self.gridInfo[variable] == 'CDF': coordinate = lower + (upper-lower)*randomUtils.random() @@ -217,8 +223,14 @@ def localGenerateInput(self, rlz, model, oldInput): rlz.inputInfo['SampledVarsPb'][varName] = self.distDict[varName].pdf(ndCoordinate) else: if self.gridInfo[varName] == 'CDF': - upper = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{varName:self.sampledCoordinate[self.counter-1][varCount]+1})[varName] - lower = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{varName:self.sampledCoordinate[self.counter-1][varCount]})[varName] + upper = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {varName:self.sampledCoordinate[self.counters['samples']-1][varCount]+1} + )[varName] + lower = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {varName:self.sampledCoordinate[self.counters['samples']-1][varCount]} + )[varName] varCount += 1 coordinate = lower + (upper-lower)*randomUtils.random() gridCoordinate, distName = self.distDict[varName].ppf(coordinate), self.variables2distributionsMapping[varName]['name'] @@ -235,8 +247,14 @@ def localGenerateInput(self, rlz, model, oldInput): if ("" in varName) or self.variables2distributionsMapping[varName]['totDim']==1: # 1D variable # if the varName is a comma separated list of strings the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables - upper = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{varName:self.sampledCoordinate[self.counter-1][varCount]+1})[varName] - lower = self.gridEntity.returnShiftedCoordinate(self.gridEntity.returnIteratorIndexes(),{varName:self.sampledCoordinate[self.counter-1][varCount]})[varName] + upper = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {varName:self.sampledCoordinate[self.counters['samples']-1][varCount]+1} + )[varName] + lower = self.gridEntity.returnShiftedCoordinate( + self.gridEntity.returnIteratorIndexes(), + {varName:self.sampledCoordinate[self.counters['samples']-1][varCount]} + )[varName] varCount += 1 if self.gridInfo[varName] =='CDF': coordinate = lower + (upper-lower)*randomUtils.random() From 38c144a62d93e87485c08f62b9be4e9a8f3242b6 Mon Sep 17 00:00:00 2001 From: talbpw Date: Wed, 18 Dec 2024 13:01:24 -0700 Subject: [PATCH 17/18] converted optimizers to using counters, limits --- plugins/HERON | 2 +- .../Optimizers/BayesianOptimizer.py | 4 ++-- ravenframework/Optimizers/GeneticAlgorithm.py | 6 +++--- ravenframework/Optimizers/Optimizer.py | 6 ++++-- .../DatabaseStorage/testGridRavenDatabase.h5 | Bin 46668 -> 46668 bytes 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/plugins/HERON b/plugins/HERON index 71152d13f9..cfc5d064eb 160000 --- a/plugins/HERON +++ b/plugins/HERON @@ -1 +1 @@ -Subproject commit 71152d13f9da46f8d9ddca72e843eae15b4a3879 +Subproject commit cfc5d064ebce0f52b2d4d87199e3865bd459dc95 diff --git a/ravenframework/Optimizers/BayesianOptimizer.py b/ravenframework/Optimizers/BayesianOptimizer.py index 948a49abc8..e3d8c0e283 100644 --- a/ravenframework/Optimizers/BayesianOptimizer.py +++ b/ravenframework/Optimizers/BayesianOptimizer.py @@ -321,7 +321,7 @@ def _useRealization(self, info, rlz): if not isinstance(rlz, dict): if step == 1: self.batch = 1 # FIXME when implementing parallel expected improvement, fix this - self.counter -= 1 #FIXME hacky way to make sure iterations are correctly counted + self.counters['samples'] -= 1 #FIXME hacky way to make sure iterations are correctly counted self.raiseAMessage(f'Initialization data of dimension {self._initialSampleSize} received... ' f'Setting sample batch size to {self.batch}') else: @@ -746,7 +746,7 @@ def _updateConvergence(self, traj, new, old, acceptable): # No point in checking convergence if no feasible point has been found if len(self._optPointHistory[0]) == 0: converged = False - elif self.getIteration(traj) < self.limit: + elif self.getIteration(traj) < self.limits['samples']: converged = self.checkConvergence(traj, new, old) else: converged = True diff --git a/ravenframework/Optimizers/GeneticAlgorithm.py b/ravenframework/Optimizers/GeneticAlgorithm.py index 32bb21dbaf..7b25f13bb8 100644 --- a/ravenframework/Optimizers/GeneticAlgorithm.py +++ b/ravenframework/Optimizers/GeneticAlgorithm.py @@ -501,7 +501,7 @@ def _useRealizationBatch(self, meta, rlz): if self._activeTraj: # 5.2@ n-1: Survivor selection(rlz) # update population container given obtained children - if self.counter > 1: + if self.counters['samples'] > 1: self.population,self.fitness,age,self.objectiveVal = self._survivorSelectionInstance(age=self.popAge, variables=list(self.toBeSampled), population=self.population, @@ -644,7 +644,7 @@ def _resolveNewGeneration(self, traj, rlz, objectiveVal, fitness, g, info): self.raiseADebug(f'Trajectory {traj} iteration {info["step"]} resolving new state ...') # note the collection of the opt point self._stepTracker[traj]['opt'] = (rlz, info) - acceptable = 'accepted' if self.counter > 1 else 'first' + acceptable = 'accepted' if self.counters['samples'] > 1 else 'first' old = self.population converged = self._updateConvergence(traj, rlz, old, acceptable) if converged: @@ -688,7 +688,7 @@ def _collectOptPoint(self, rlz, fitness, objectiveVal, g): key=lambda x: (x[1]))]) point = dict((var,optPoints[0][i]) for i, var in enumerate(selVars) if var in rlz.data_vars) gOfBest = dict(('ConstraintEvaluation_'+name,float(gOfBest[0][i])) for i, name in enumerate(g.coords['Constraint'].values)) - if (self.counter > 1 and obj[0] <= self.bestObjective and fit[0] >= self.bestFitness) or self.counter == 1: + if (self.counters['samples'] > 1 and obj[0] <= self.bestObjective and fit[0] >= self.bestFitness) or self.counters['samples'] == 1: point.update(gOfBest) self.bestPoint = point self.bestFitness = fit[0] diff --git a/ravenframework/Optimizers/Optimizer.py b/ravenframework/Optimizers/Optimizer.py index 4202f6ea75..cc5720ab3d 100644 --- a/ravenframework/Optimizers/Optimizer.py +++ b/ravenframework/Optimizers/Optimizer.py @@ -413,8 +413,10 @@ def _initializeInitSampler(self, externalSeeding): numTraj = len(self._initialValues) if self._initialValues else None # if there are already-initialized variables (i.e. not sampled, but given), then check num samples if numTraj: - if numTraj != self._initSampler.limit: - self.raiseAnError(IOError, f'{numTraj} initial points have been given, but Initialization Sampler "{self._initSampler.name}" provides {self._initSampler.limit} samples!') + if numTraj != self._initSampler.limits['samples']: + self.raiseAnError(IOError, + f'{numTraj} initial points have been given, but Initialization Sampler ' +\ + f'"{self._initSampler.name}" provides {self._initSampler.limits["samples"]} samples!') else: numTraj = self._initSampler.limit self._initialValues = [{} for _ in range(numTraj)] diff --git a/tests/framework/DataObjects/DataobjectsAttributes/DatabaseStorage/testGridRavenDatabase.h5 b/tests/framework/DataObjects/DataobjectsAttributes/DatabaseStorage/testGridRavenDatabase.h5 index ea9775b814c9e284de64c23fd4efe8a1206d0fb3..22f854028d2e67c5f30b1ef7a0b9509114419b82 100644 GIT binary patch delta 44 ycmX@}hUv^3rU^-mY#WnxnHeoMPh^&6VPu%RmR+2kg#iQ0NMY{>;lZ}0+3<>2eD%<1Cu0x5VN2I3 Date: Wed, 18 Dec 2024 15:25:17 -0700 Subject: [PATCH 18/18] fixed createNewInput in framework test externalmodels --- .../dataObject_metadata/dataObjectTest.py | 2 -- .../limitSurfaceTestExternalModel.py | 3 --- .../Models/External/AllMethods/attenuate.py | 11 +++++++++-- tests/framework/NDGridPWcdf/lorentzAttractor.py | 11 +++++++++-- tests/framework/NDGridPWvalue/lorentzAttractor.py | 11 +++++++++-- .../framework/ND_test_Grid_cdf/lorentzAttractor.py | 11 +++++++++-- .../ND_test_Grid_value/lorentzAttractor.py | 11 +++++++++-- tests/framework/ND_test_LHS/lorentzAttractor.py | 12 +++++++++--- tests/framework/ND_test_MC/lorentzAttractor.py | 13 ++++++++++--- .../ND_test_MC_MVN/lorentzAttractor_noK.py | 4 ---- .../basicStatisticsExternalModelTest.py | 2 -- .../basicStatisticsExternalModelTest.py | 2 -- .../basicStatisticsExternalModelTest.py | 2 -- .../basicStatisticsExternalModelTest.py | 2 -- .../economicRatioExternalModelTest.py | 2 -- .../limitSurface/limitSurfaceTestExternalModel.py | 3 --- .../limitSurfaceTestExternalModel.py | 3 --- .../RavenOutputPostProcessor/dynamic/time_model.py | 10 +++++++++- .../transform/lorentzAttractor_noK.py | 12 +++++++++--- tests/framework/dists_vars/dist_var.py | 2 -- .../pcaSampler/gridPCAIndex/lorentzAttractor_noK.py | 4 ---- .../pcaSampler/multiPCA/lorentzAttractor_noK.py | 4 ---- .../pcaSampler/pcaIndex/lorentzAttractor_noK.py | 4 ---- tests/framework/pca_LHS/LHS/lorentzAttractor_noK.py | 4 ---- .../pca_LHS/LHSCdf/lorentzAttractor_noK.py | 3 --- .../pca_LHS/LHSValue/lorentzAttractor_noK.py | 3 --- .../pca_LHS/LHScorrelated/lorentzAttractor_noK.py | 3 --- .../pca_LHS/LHSuncorrelated/lorentzAttractor_noK.py | 3 --- .../pca_LHS/stratifiedCdf/lorentzAttractor_noK.py | 3 --- .../pca_LHS/stratifiedValue/lorentzAttractor_noK.py | 3 --- .../pca_grid/gridPCACdf/lorentzAttractor_noK.py | 4 ---- .../pca_grid/gridPCAValue/lorentzAttractor_noK.py | 4 ---- .../pca_grid/mcPCA/lorentzAttractor_noK.py | 4 ---- .../pca_rom/MVNAbs/lorentzAttractor_noK.py | 3 --- .../pca_rom/MVNRel/lorentzAttractor_noK.py | 3 --- .../pca_rom/PCAReduction/lorentzAttractor_noK.py | 3 --- .../pca_rom/oneDimPCA/lorentzAttractor_noK.py | 3 --- 37 files changed, 82 insertions(+), 105 deletions(-) diff --git a/tests/framework/DataObjects/dataObject_metadata/dataObjectTest.py b/tests/framework/DataObjects/dataObject_metadata/dataObjectTest.py index 6746858554..1394383118 100644 --- a/tests/framework/DataObjects/dataObject_metadata/dataObjectTest.py +++ b/tests/framework/DataObjects/dataObject_metadata/dataObjectTest.py @@ -32,8 +32,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/InternalParallelTests/InternalParallelPostProcessorLS/limitSurfaceTestExternalModel.py b/tests/framework/InternalParallelTests/InternalParallelPostProcessorLS/limitSurfaceTestExternalModel.py index 75c20d5713..1389851b59 100644 --- a/tests/framework/InternalParallelTests/InternalParallelPostProcessorLS/limitSurfaceTestExternalModel.py +++ b/tests/framework/InternalParallelTests/InternalParallelPostProcessorLS/limitSurfaceTestExternalModel.py @@ -23,9 +23,6 @@ def initialize(self,runInfoDict,inputFiles): self.z = 0 return -#def createNewInput(self,myInput,samplerType,**Kwargs): -# return Kwargs['SampledVars'] - def run(self,Input): #self.z = Input['x0']+Input['y0'] self.z = self.x0 + self.y0 diff --git a/tests/framework/Models/External/AllMethods/attenuate.py b/tests/framework/Models/External/AllMethods/attenuate.py index c04c9829a6..3f6fa8b528 100644 --- a/tests/framework/Models/External/AllMethods/attenuate.py +++ b/tests/framework/Models/External/AllMethods/attenuate.py @@ -39,13 +39,20 @@ def initialize(self,runInfo,inputs): # check readMoreXML value is available self.fromInit = np.sqrt(self.fromReadMoreXML) -def createNewInput(self,inputs,samplerType,**kwargs): +def createNewInput(self, inputs, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, inputs, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ # check sampler type if samplerType != 'Grid': raise IOError('Received wrong sampler type in external model createNewInput! Expected "Grid" but got '+samplerType) # set a variable through "self" self.fromCNISelf = self.fromReadMoreXML / 2.0 - toReturn = dict(kwargs['SampledVars']) + toReturn = dict(rlz) toReturn['fromCNIDict'] = self.fromInit * 2.0 toReturn['unwanted'] = 42 return toReturn diff --git a/tests/framework/NDGridPWcdf/lorentzAttractor.py b/tests/framework/NDGridPWcdf/lorentzAttractor.py index 93ae231101..f871b3aec4 100644 --- a/tests/framework/NDGridPWcdf/lorentzAttractor.py +++ b/tests/framework/NDGridPWcdf/lorentzAttractor.py @@ -25,8 +25,15 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): #max_time = 0.03 diff --git a/tests/framework/NDGridPWvalue/lorentzAttractor.py b/tests/framework/NDGridPWvalue/lorentzAttractor.py index 93ae231101..f871b3aec4 100644 --- a/tests/framework/NDGridPWvalue/lorentzAttractor.py +++ b/tests/framework/NDGridPWvalue/lorentzAttractor.py @@ -25,8 +25,15 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): #max_time = 0.03 diff --git a/tests/framework/ND_test_Grid_cdf/lorentzAttractor.py b/tests/framework/ND_test_Grid_cdf/lorentzAttractor.py index 93ae231101..f871b3aec4 100644 --- a/tests/framework/ND_test_Grid_cdf/lorentzAttractor.py +++ b/tests/framework/ND_test_Grid_cdf/lorentzAttractor.py @@ -25,8 +25,15 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): #max_time = 0.03 diff --git a/tests/framework/ND_test_Grid_value/lorentzAttractor.py b/tests/framework/ND_test_Grid_value/lorentzAttractor.py index 93ae231101..f871b3aec4 100644 --- a/tests/framework/ND_test_Grid_value/lorentzAttractor.py +++ b/tests/framework/ND_test_Grid_value/lorentzAttractor.py @@ -25,8 +25,15 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): #max_time = 0.03 diff --git a/tests/framework/ND_test_LHS/lorentzAttractor.py b/tests/framework/ND_test_LHS/lorentzAttractor.py index 062289bf1c..aa49dfd686 100644 --- a/tests/framework/ND_test_LHS/lorentzAttractor.py +++ b/tests/framework/ND_test_LHS/lorentzAttractor.py @@ -24,9 +24,15 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): max_time = 0.03 diff --git a/tests/framework/ND_test_MC/lorentzAttractor.py b/tests/framework/ND_test_MC/lorentzAttractor.py index 93ae231101..671b899c84 100644 --- a/tests/framework/ND_test_MC/lorentzAttractor.py +++ b/tests/framework/ND_test_MC/lorentzAttractor.py @@ -25,10 +25,17 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz -def run(self,Input): +def run(self, Input): #max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/ND_test_MC_MVN/lorentzAttractor_noK.py b/tests/framework/ND_test_MC_MVN/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/ND_test_MC_MVN/lorentzAttractor_noK.py +++ b/tests/framework/ND_test_MC_MVN/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsGeneral/basicStatisticsExternalModelTest.py b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsGeneral/basicStatisticsExternalModelTest.py index 7faf9e6afa..36dadd76bd 100644 --- a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsGeneral/basicStatisticsExternalModelTest.py +++ b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsGeneral/basicStatisticsExternalModelTest.py @@ -32,8 +32,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependent/basicStatisticsExternalModelTest.py b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependent/basicStatisticsExternalModelTest.py index c0cb91c897..a6a9a20970 100644 --- a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependent/basicStatisticsExternalModelTest.py +++ b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependent/basicStatisticsExternalModelTest.py @@ -32,8 +32,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependentAsynchronousHistories/basicStatisticsExternalModelTest.py b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependentAsynchronousHistories/basicStatisticsExternalModelTest.py index b41158b4e7..88c94779a2 100644 --- a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependentAsynchronousHistories/basicStatisticsExternalModelTest.py +++ b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsTimeDependentAsynchronousHistories/basicStatisticsExternalModelTest.py @@ -32,8 +32,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsXml/basicStatisticsExternalModelTest.py b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsXml/basicStatisticsExternalModelTest.py index 044c251f55..e53c198618 100644 --- a/tests/framework/PostProcessors/BasicStatistics/basicStatisticsXml/basicStatisticsExternalModelTest.py +++ b/tests/framework/PostProcessors/BasicStatistics/basicStatisticsXml/basicStatisticsExternalModelTest.py @@ -32,8 +32,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/PostProcessors/EconomicRatio/economicRatioTimeDependent/economicRatioExternalModelTest.py b/tests/framework/PostProcessors/EconomicRatio/economicRatioTimeDependent/economicRatioExternalModelTest.py index 04e9361ed1..3928d532e3 100644 --- a/tests/framework/PostProcessors/EconomicRatio/economicRatioTimeDependent/economicRatioExternalModelTest.py +++ b/tests/framework/PostProcessors/EconomicRatio/economicRatioTimeDependent/economicRatioExternalModelTest.py @@ -29,8 +29,6 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] - def run(self,Input): self.cnt = 1.0 self.x0 = 1.0 diff --git a/tests/framework/PostProcessors/LimitSurface/limitSurface/limitSurfaceTestExternalModel.py b/tests/framework/PostProcessors/LimitSurface/limitSurface/limitSurfaceTestExternalModel.py index 75c20d5713..1389851b59 100644 --- a/tests/framework/PostProcessors/LimitSurface/limitSurface/limitSurfaceTestExternalModel.py +++ b/tests/framework/PostProcessors/LimitSurface/limitSurface/limitSurfaceTestExternalModel.py @@ -23,9 +23,6 @@ def initialize(self,runInfoDict,inputFiles): self.z = 0 return -#def createNewInput(self,myInput,samplerType,**Kwargs): -# return Kwargs['SampledVars'] - def run(self,Input): #self.z = Input['x0']+Input['y0'] self.z = self.x0 + self.y0 diff --git a/tests/framework/PostProcessors/LimitSurface/limitSurface_integral/limitSurfaceTestExternalModel.py b/tests/framework/PostProcessors/LimitSurface/limitSurface_integral/limitSurfaceTestExternalModel.py index 75c20d5713..1389851b59 100644 --- a/tests/framework/PostProcessors/LimitSurface/limitSurface_integral/limitSurfaceTestExternalModel.py +++ b/tests/framework/PostProcessors/LimitSurface/limitSurface_integral/limitSurfaceTestExternalModel.py @@ -23,9 +23,6 @@ def initialize(self,runInfoDict,inputFiles): self.z = 0 return -#def createNewInput(self,myInput,samplerType,**Kwargs): -# return Kwargs['SampledVars'] - def run(self,Input): #self.z = Input['x0']+Input['y0'] self.z = self.x0 + self.y0 diff --git a/tests/framework/PostProcessors/RavenOutputPostProcessor/dynamic/time_model.py b/tests/framework/PostProcessors/RavenOutputPostProcessor/dynamic/time_model.py index 7faf9e6afa..e40f83c395 100644 --- a/tests/framework/PostProcessors/RavenOutputPostProcessor/dynamic/time_model.py +++ b/tests/framework/PostProcessors/RavenOutputPostProcessor/dynamic/time_model.py @@ -32,7 +32,15 @@ def initialize(self,runInfoDict,inputFiles): self.cnt = 0.0 return -def createNewInput(self,myInput,samplerType,**Kwargs): return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): self.cnt = 1.0 diff --git a/tests/framework/PostProcessors/pca_postprocessor/transform/lorentzAttractor_noK.py b/tests/framework/PostProcessors/pca_postprocessor/transform/lorentzAttractor_noK.py index b78a562a66..7a96b3f653 100644 --- a/tests/framework/PostProcessors/pca_postprocessor/transform/lorentzAttractor_noK.py +++ b/tests/framework/PostProcessors/pca_postprocessor/transform/lorentzAttractor_noK.py @@ -24,9 +24,15 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] +def createNewInput(self, myInput, samplerType, rlz): + """ + This function will return a new input to be submitted to the model + @ In, myInput, list, the inputs (list) to start from to generate the new one + @ In, samplerType, string, is the type of sampler that is calling to generate a new input + @ In, rlz, Realization, Realization from whiech to build input + @ Out, ([(inputDict)],copy.deepcopy(kwargs)), tuple, return the new input in a tuple form + """ + return rlz def run(self,Input): max_time = 0.03 diff --git a/tests/framework/dists_vars/dist_var.py b/tests/framework/dists_vars/dist_var.py index c1d199a91f..e075641a91 100644 --- a/tests/framework/dists_vars/dist_var.py +++ b/tests/framework/dists_vars/dist_var.py @@ -11,8 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] def run(self,Input): diff --git a/tests/framework/pcaSampler/gridPCAIndex/lorentzAttractor_noK.py b/tests/framework/pcaSampler/gridPCAIndex/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pcaSampler/gridPCAIndex/lorentzAttractor_noK.py +++ b/tests/framework/pcaSampler/gridPCAIndex/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pcaSampler/multiPCA/lorentzAttractor_noK.py b/tests/framework/pcaSampler/multiPCA/lorentzAttractor_noK.py index 4604e785f0..9fcea71339 100644 --- a/tests/framework/pcaSampler/multiPCA/lorentzAttractor_noK.py +++ b/tests/framework/pcaSampler/multiPCA/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pcaSampler/pcaIndex/lorentzAttractor_noK.py b/tests/framework/pcaSampler/pcaIndex/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pcaSampler/pcaIndex/lorentzAttractor_noK.py +++ b/tests/framework/pcaSampler/pcaIndex/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/LHS/lorentzAttractor_noK.py b/tests/framework/pca_LHS/LHS/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pca_LHS/LHS/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/LHS/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/LHSCdf/lorentzAttractor_noK.py b/tests/framework/pca_LHS/LHSCdf/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/LHSCdf/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/LHSCdf/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/LHSValue/lorentzAttractor_noK.py b/tests/framework/pca_LHS/LHSValue/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/LHSValue/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/LHSValue/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/LHScorrelated/lorentzAttractor_noK.py b/tests/framework/pca_LHS/LHScorrelated/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/LHScorrelated/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/LHScorrelated/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/LHSuncorrelated/lorentzAttractor_noK.py b/tests/framework/pca_LHS/LHSuncorrelated/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/LHSuncorrelated/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/LHSuncorrelated/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/stratifiedCdf/lorentzAttractor_noK.py b/tests/framework/pca_LHS/stratifiedCdf/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/stratifiedCdf/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/stratifiedCdf/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_LHS/stratifiedValue/lorentzAttractor_noK.py b/tests/framework/pca_LHS/stratifiedValue/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_LHS/stratifiedValue/lorentzAttractor_noK.py +++ b/tests/framework/pca_LHS/stratifiedValue/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_grid/gridPCACdf/lorentzAttractor_noK.py b/tests/framework/pca_grid/gridPCACdf/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pca_grid/gridPCACdf/lorentzAttractor_noK.py +++ b/tests/framework/pca_grid/gridPCACdf/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_grid/gridPCAValue/lorentzAttractor_noK.py b/tests/framework/pca_grid/gridPCAValue/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pca_grid/gridPCAValue/lorentzAttractor_noK.py +++ b/tests/framework/pca_grid/gridPCAValue/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_grid/mcPCA/lorentzAttractor_noK.py b/tests/framework/pca_grid/mcPCA/lorentzAttractor_noK.py index b78a562a66..4ce51dc325 100644 --- a/tests/framework/pca_grid/mcPCA/lorentzAttractor_noK.py +++ b/tests/framework/pca_grid/mcPCA/lorentzAttractor_noK.py @@ -24,10 +24,6 @@ def initialize(self,runInfoDict,inputFiles): self.beta = 8.0/3.0 return - -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_rom/MVNAbs/lorentzAttractor_noK.py b/tests/framework/pca_rom/MVNAbs/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_rom/MVNAbs/lorentzAttractor_noK.py +++ b/tests/framework/pca_rom/MVNAbs/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_rom/MVNRel/lorentzAttractor_noK.py b/tests/framework/pca_rom/MVNRel/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_rom/MVNRel/lorentzAttractor_noK.py +++ b/tests/framework/pca_rom/MVNRel/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_rom/PCAReduction/lorentzAttractor_noK.py b/tests/framework/pca_rom/PCAReduction/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_rom/PCAReduction/lorentzAttractor_noK.py +++ b/tests/framework/pca_rom/PCAReduction/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01 diff --git a/tests/framework/pca_rom/oneDimPCA/lorentzAttractor_noK.py b/tests/framework/pca_rom/oneDimPCA/lorentzAttractor_noK.py index b78a562a66..989274300b 100644 --- a/tests/framework/pca_rom/oneDimPCA/lorentzAttractor_noK.py +++ b/tests/framework/pca_rom/oneDimPCA/lorentzAttractor_noK.py @@ -25,9 +25,6 @@ def initialize(self,runInfoDict,inputFiles): return -def createNewInput(self,myInput,samplerType,**Kwargs): - return Kwargs['SampledVars'] - def run(self,Input): max_time = 0.03 t_step = 0.01