Merge pull request #2309 from GabrielSoto-INL/autoARMA-rework

AutoARMA Algorithm for SyntheticHistory ROM. XSD schemas are not updated. An issue was created
idaholab · May 13, 2024 · 579eb05 · 579eb05
2 parents 6302515 + 6d2458f
commit 579eb05
Show file tree

Hide file tree

Showing 54 changed files with 3,090 additions and 212 deletions.
diff --git a/dependencies.xml b/dependencies.xml
@@ -73,6 +73,7 @@ Note all install methods after "main" take
     <smt machine='x86_64'/> <!-- not available on macos arm64 -->
     <line_profiler optional='True'/>
     <!-- <ete3 optional='True'/> -->
+    <statsforecast/>
     <pywavelets optional='True'>1.2</pywavelets>
     <python-sensors source="pip"/>
     <numdifftools source="pip">0.9</numdifftools>

diff --git a/doc/user_manual/generated/generateRomDoc.py b/doc/user_manual/generated/generateRomDoc.py
@@ -221,8 +221,8 @@
         <periods>12, 24</periods>
       </fourier>
       <arma target="signal1, signal2" seed='42'>
-        <SignalLag>2</SignalLag>
-        <NoiseLag>3</NoiseLag>
+        <P>2</P>
+        <Q>3</Q>
       </arma>
     </ROM>
     ...

diff --git a/ravenframework/Models/ROM.py b/ravenframework/Models/ROM.py
@@ -237,8 +237,11 @@ def _readMoreXML(self,xmlNode):
       segment = xmlNode.find('Segment')
       romXml = copy.deepcopy(xmlNode)
       romXml.remove(segment)
+      # depending on segType, this ROM *will* have clusters and we will need this fact later
+      self._interfaceROM.overrideHasClusters(segType in ['cluster', 'interpolate'])
     else:
       romXml = xmlNode
+      self._interfaceROM.overrideHasClusters(False) # just making sure it's False otherwise
     self._interfaceROM._readMoreXML(romXml)
 
     if self.segment:

diff --git a/ravenframework/SupervisedLearning/ROMCollection.py b/ravenframework/SupervisedLearning/ROMCollection.py
@@ -636,7 +636,7 @@ def _writeSegmentsRealization(self, writeTo):
     """
       Writes pointwise data about segmentation to a realization.
       @ In, writeTo, DataObject, data structure into which data should be written
-      @ Out, None
+      @ Out, rlz, dict, realization data structure where each entry is an np.ndarray
     """
 
     # realization to add eventually
@@ -956,12 +956,23 @@ def writePointwiseData(self, writeTo):
     featureNames = sorted(list(self._clusterInfo['features']['unscaled'].keys()))
     for scaling in ['unscaled', 'scaled']:
       for name in featureNames:
-        varName = 'ClusterFeature|{}|{}'.format(name, scaling)
+        varName = f'ClusterFeature|{name}|{scaling}'
         writeTo.addVariable(varName, np.array([]), classify='meta', indices=['segment_number'])
         rlz[varName] = np.asarray(self._clusterInfo['features'][scaling][name])
     varName = 'ClusterLabels'
     writeTo.addVariable(varName, np.array([]), classify='meta', indices=['segment_number'])
     rlz[varName] = np.asarray(labels)
+    # below, we loop through all segment ROMs to find feature data to write to data object
+    segments = self.getSegmentRoms(full=True)
+    for i,rom in enumerate(segments):
+      romRlz = rom.getSegmentPointwiseData()
+      for feature, featureVal in romRlz.items():
+        varName = f'Feature|{feature}'
+        if i==0:
+          writeTo.addVariable(varName, np.array([]), classify='meta', indices=['segment_number'])
+          rlz[varName] = featureVal
+        else:
+          rlz[varName] = np.r_[rlz[varName],featureVal]
 
     writeTo.addRealization(rlz)
 
@@ -981,7 +992,7 @@ def writeXML(self, writeTo, targets=None, skip=None):
     labels = self._clusterInfo['labels']
     for i, repRom in enumerate(self._roms):
       # find associated node
-      modify = xmlUtils.findPath(main, 'SegmentROM[@segment={}]'.format(i))
+      modify = xmlUtils.findPath(main, f'SegmentROM[@segment={i}]')
       # make changes to reflect being a cluster
       modify.tag = 'ClusterROM'
       modify.attrib['cluster'] = modify.attrib.pop('segment')

diff --git a/ravenframework/SupervisedLearning/SupervisedLearning.py b/ravenframework/SupervisedLearning/SupervisedLearning.py
@@ -204,6 +204,8 @@ def __init__(self):
     # After the computation, the importances are set as attribute of the self.model
     # variable and called 'feature_importances_' and accessable as self.model.feature_importances_
     self.computeImportances = False
+    # distinction between existing param `isClusterable` and whether it does, in fact, have clusters
+    self._hasClusters = False # can only be true if `isClusterable`==True
 
   def __getstate__(self):
     """
@@ -658,6 +660,17 @@ def writePointwiseData(self, *args):
     # by default, nothing to write!
     self.raiseAMessage('Writing ROM "{}", but no pointwise data found. Moving on ...')
 
+  def getSegmentPointwiseData(self):
+    """
+      Allows the SVE to accumulate data arrays to later add to a DataObject
+      Overload in subclasses.
+      @ In, None
+      @ Out, segmentData, dict
+    """
+    # by default, nothing to write!
+    self.raiseAMessage('Writing ROM, but no pointwise data found. Moving on ...')
+    return {}
+
   def writeXML(self, writeTo, targets=None, skip=None):
     """
       Allows the SVE to put whatever it wants into an XML to print to file.
@@ -701,13 +714,36 @@ def setAdditionalParams(self, params):
   ### ROM Clustering (see ROMCollection.py) ###
   def isClusterable(self):
     """
-      Allows ROM to declare whether it has methods for clustring. Default is no.
+      Allows ROM to declare whether it has methods for clustering. Default is no.
       @ In, None
       @ Out, isClusterable, bool, if True then has clustering mechanics.
     """
     # only true if overridden.
     return False
 
+  def overrideHasClusters(self, willHaveClusters: bool):
+    """
+      Sets protected class member which tells ROM whether there will be clustering
+      @ In, willHaveClusters. bool, will the ROM have clustering in this run?
+      @ Out, None
+    """
+    assert isinstance(willHaveClusters, bool)
+    if not self.isClusterable():
+      # if ROM can't cluster in the first place... default to False
+      if willHaveClusters:
+        self.raiseAWarning("Clustering not allowed in this ROM, defaulting `hasClusters` to False")
+      self._hasClusters = False
+    else:
+      self._hasClusters = willHaveClusters
+
+  def hasClusters(self):
+    """
+      Allows ROM to declare if is *has* clusters, not just if it is capable. Default is no.
+      @ In, None
+      @ Out, hasClusters, bool, if True then contains clusters
+    """
+    return self._hasClusters
+
   def checkRequestedClusterFeatures(self, request):
     """
       Takes the user-requested features (sometimes "all") and interprets them for this ROM.

diff --git a/ravenframework/SupervisedLearning/SyntheticHistory.py b/ravenframework/SupervisedLearning/SyntheticHistory.py
@@ -78,7 +78,7 @@ def _handleInput(self, paramInput):
       @ Out, None
     """
     SupervisedLearning._handleInput(self, paramInput)
-    self.readTSAInput(paramInput)
+    self.readTSAInput(paramInput, self.hasClusters())
     if len(self._tsaAlgorithms)==0:
       self.raiseAWarning("No Segmenting algorithms were requested.")
 
@@ -157,6 +157,21 @@ def writePointwiseData(self, writeTo):
     """
     pass # TODO
 
+  def getSegmentPointwiseData(self):
+    """
+      Allows the SVE to accumulate data arrays to later add to a DataObject
+      Overload in subclasses.
+      @ In, None
+      @ Out, segmentData, dict
+    """
+    segmentNonFeatures = self.getTSApointwiseData()
+    formattedNonFeatures = {}
+    for algo,algoInfo in segmentNonFeatures.items():
+      for target,targetInfo in algoInfo.items():
+        for k,val in targetInfo.items():
+          formattedNonFeatures[f'{target}|{algo}|{k}'] = val
+    return formattedNonFeatures
+
   def writeXML(self, writeTo, targets=None, skip=None):
     """
       Allows the SVE to put whatever it wants into an XML to print to file.
@@ -212,16 +227,18 @@ def checkRequestedClusterFeatures(self, request):
                         '\n  '.join(errMsg))
     return request
 
-  def _getClusterableFeatures(self):
+  def _getClusterableFeatures(self, trainGlobal=False):
     """
       Provides a list of clusterable features.
       For this ROM, these are as "TSA_algorith|feature" such as "fourier|amplitude"
       @ In, None
+      @ In, trainGlobal, bool, if True then this method uses the globally trained algorithms
       @ Out, features, dict(list(str)), clusterable features by algorithm
     """
     features = {}
     # check: is it possible tsaAlgorithms isn't populated by now?
-    for algo in self._tsaAlgorithms:
+    algorithms = self._tsaGlobalAlgorithms if trainGlobal else self._tsaAlgorithms
+    for algo in algorithms:
       if algo.canCharacterize():
         features[algo.name] = algo._features
       else:
@@ -320,8 +337,17 @@ def parametrizeGlobalRomFeatures(self, featureDict):
       @ In, featureDict, dict, dictionary of features to parametrize
       @ Out, params, dict, dictionary of collected parametrized features
     """
-    # NOTE: only used during interpolation for global features! returning empty dict...
+    # NOTE: this should match the clustered features template.
+    featureTemplate = '{target}|{metric}|{id}' # TODO this kind of has to be the format currently
     params = {}
+    requests = self._getClusterableFeatures(trainGlobal=True)
+
+    for algo in self._tsaGlobalAlgorithms:
+      if algo.name not in requests or not algo.canCharacterize():
+        continue
+      algoReq = requests[algo.name] if requests is not None else None
+      algoFeatures = algo.getClusteringValues(featureTemplate, algoReq, self._tsaTrainedParams[algo])
+      params.update(algoFeatures)
     return params
 
   def setGlobalRomFeatures(self, params, pivotValues):
@@ -332,9 +358,30 @@ def setGlobalRomFeatures(self, params, pivotValues):
       @ In, pivotValues, np.array, values of time parameter
       @ Out, results, dict, global ROM feature set
     """
-    # NOTE: only used during interpolation for global features! returning empty dict...
-    results = {}
-    return results
+    byAlgo = collections.defaultdict(list)
+    for feature, values in params.items():
+      target, algoName, ident = feature.split('|', maxsplit=2)
+      byAlgo[algoName].append((target, ident, values))
+    for algo in self._tsaAlgorithms:
+      settings = byAlgo.get(algo.name, None)
+      if settings:
+        # there might be multiple instances of same algo w/ different targets, need to filter by targets
+        # filtered_settings = [feat for feat in settings if feat[0] in self._tsaTrainedParams[algo]]
+        params = algo.setClusteringValues(settings, self._tsaTrainedParams[algo])
+        self._tsaTrainedParams[algo] = params
+    return self._tsaTrainedParams
+
+  def finalizeLocalRomSegmentEvaluation(self,  settings, evaluation, globalPicker, localPicker=None):
+    """
+      Allows global settings in "settings" to affect a LOCAL evaluation of a LOCAL ROM
+      Note this is called on the LOCAL subsegment ROM and not the GLOBAL templateROM.
+      @ In, settings, dict, as from getGlobalRomSegmentSettings
+      @ In, evaluation, dict, preliminary evaluation from the local segment ROM as {target: [values]}
+      @ In, globalPicker, slice, indexer for data range of this segment FROM GLOBAL SIGNAL
+      @ In, localPicker, slice, optional, indexer for part of signal that should be adjusted IN LOCAL SIGNAL
+      @ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation
+    """
+    return evaluation
 
   ### ESSENTIALLY UNUSED ###
   def _localNormalizeData(self,values,names,feat):