Skip to content

Commit

Permalink
Implements data transformations in TSA module (#2121)
Browse files Browse the repository at this point in the history
* Adds scikit-learn data transformations to TSA module

* adds the test XML file

* adds masking transforms, facilitates common FunctionTransformers, and adds the ability to use custom transformers from a separate python file

* refactored to use statsmodels OLS for better handling of missing values

* reworks handling data with missing values

* removes excess whitespace

* updates documentation to include TSA.Transformer class

* regolded due to change in regression solver used in TSA.Fourier

* adds docstrings

* reworks TimeSeriesAnalyzer inheritance structure, adds transformer base classes, makes transformer XML tags explicit

* regold to reflect change in transformers XML

* WIP unit testing for TSA transformers

* simplifies inheritance requirements for ARMA and RWD

* minor fixes and additions

* minor fix in generation step

* reverts RWD to being a characterizer only

* TSA README

* makes scikit-learn wrappers abstract classes and makes it so abstract classes are not included in User Manual

* updates documentation for TSA module

* makes docstrings compliant with RAVEN standard

* reduces the number of time steps for ARMA and LogARMA tests and regolds

* makes TSA.ARMA a transformer

* more docstring fixes

* removes algorithms Venn diagram

* adds updated algorithm Venn diagram

* replaces the algorithms Venn diagram with a table in the README file

* missing comma in array index

* fix for masking in ARMA

* regold due to new solver used in Fourier fit

* changes gold file for Basic test for mac
  • Loading branch information
j-bryan authored Jul 10, 2023
1 parent 9f4cdc5 commit 137d53f
Show file tree
Hide file tree
Showing 49 changed files with 5,621 additions and 5,314 deletions.
300 changes: 205 additions & 95 deletions doc/user_manual/generated/internalRom.tex

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions ravenframework/Models/PostProcessors/TSACharacterizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def _handleInput(self, inp):
"""
super()._handleInput(inp)
self.readTSAInput(inp)
if not self.canCharacterize():
self.raiseAnError(IOError, 'TSACharacterizer requires at least one TSA algorithm that can characterize!')

def initialize(self, runInfo, inputs, initDict=None):
"""
Expand Down
79 changes: 49 additions & 30 deletions ravenframework/TSA/ARMA.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
statsmodels = importerUtils.importModuleLazy('statsmodels', globals())

from .. import Distributions
from .TimeSeriesAnalyzer import TimeSeriesGenerator, TimeSeriesCharacterizer
from .TimeSeriesAnalyzer import TimeSeriesGenerator, TimeSeriesCharacterizer, TimeSeriesTransformer


# utility methods
class ARMA(TimeSeriesGenerator, TimeSeriesCharacterizer):
class ARMA(TimeSeriesGenerator, TimeSeriesCharacterizer, TimeSeriesTransformer):
r"""
AutoRegressive Moving Average time series analyzer algorithm
"""
Expand All @@ -39,12 +39,15 @@ class ARMA(TimeSeriesGenerator, TimeSeriesCharacterizer):
'ma',
'sigma2',
'const']
_acceptsMissingValues = True
_isStochastic = True

@classmethod
def getInputSpecification(cls):
"""
Method to get a reference to a class that specifies the input data for
class cls.
@ In, None
@ Out, inputSpecification, InputData.ParameterInput, class to use for
specifying input of cls.
"""
Expand Down Expand Up @@ -94,7 +97,7 @@ def __init__(self, *args, **kwargs):
def handleInput(self, spec):
"""
Reads user inputs into this object.
@ In, inp, InputData.InputParams, input specifications
@ In, spec, InputData.InputParams, input specifications
@ Out, settings, dict, initialization settings for this algorithm
"""
settings = super().handleInput(spec)
Expand All @@ -119,7 +122,7 @@ def setDefaults(self, settings):
settings['reduce_memory'] = False
return settings

def characterize(self, signal, pivot, targets, settings):
def fit(self, signal, pivot, targets, settings):
"""
Determines the charactistics of the signal based on this algorithm.
@ In, signal, np.ndarray, time series with dims [time, target]
Expand All @@ -143,13 +146,15 @@ def characterize(self, signal, pivot, targets, settings):
for tg, target in enumerate(targets):
params[target] = {}
history = signal[:, tg]
mask = ~np.isnan(history)
if settings.get('gaussianize', True):
# Transform data to obatain normal distrbuted series. See
# J.M.Morales, R.Minguez, A.J.Conejo "A methodology to generate statistically dependent wind speed scenarios,"
# Applied Energy, 87(2010) 843-855
# -> then train independent ARMAs
params[target]['cdf'] = mathUtils.characterizeCDF(history, binOps=2, minBins=self._minBins)
normed = mathUtils.gaussianize(history, params[target]['cdf'])
params[target]['cdf'] = mathUtils.characterizeCDF(history[mask], binOps=2, minBins=self._minBins)
normed = history
normed[mask] = mathUtils.gaussianize(history[mask], params[target]['cdf'])
else:
normed = history
# TODO correlation (VARMA) as well as singular -> maybe should be independent TSA algo?
Expand Down Expand Up @@ -190,6 +195,44 @@ def characterize(self, signal, pivot, targets, settings):
params[target]['arma']['results'] = res
return params

def getResidual(self, initial, params, pivot, settings):
"""
Removes trained signal from data and find residual
@ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in
same order as self.target
@ In, params, dict, training parameters as from self.characterize
@ In, pivot, np.array, time-like array values
@ In, settings, dict, additional settings specific to algorithm
@ Out, residual, np.array, reduced signal shaped [pivotValues, targets]
"""
# The residual for an ARMA model can be useful, and we want to return that if it's available.
# If the 'reduce_memory' option was used, then the ARIMAResults object from fitting the model
# where that residual is stored is not available. In that case, we simply return the original.
if settings['reduce_memory']:
return initial

residual = initial.copy()
for t, (target, data) in enumerate(params.items()):
residual[:, t] = data['arma']['results'].resid

return residual

def getComposite(self, initial, params, pivot, settings):
"""
Combines two component signals to form a composite signal. This is essentially the inverse
operation of the getResidual method.
@ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in
same order as self.target
@ In, params, dict, training parameters as from self.characterize
@ In, pivot, np.array, time-like array values
@ In, settings, dict, additional settings specific to algorithm
@ Out, composite, np.array, resulting composite signal
"""
# Add a generated ARMA signal to the initial signal.
synthetic = self.generate(params, pivot, settings)
composite = initial + synthetic
return composite

def getParamNames(self, settings):
"""
Return list of expected variable names based on the parameters
Expand Down Expand Up @@ -224,30 +267,6 @@ def getParamsAsVars(self, params):
rlz[f'{base}__MA__{q}'] = ma
return rlz

def getResidual(self, initial, params, pivot, settings):
"""
@ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in
same order as self.target
@ In, params, dict, training parameters as from self.characterize
@ In, pivot, np.array, time-like array values
@ In, settings, dict, additional settings specific to algorithm
@ Out, residual, np.array, reduced signal shaped [pivotValues, targets]
"""
raise NotImplementedError('ARMA cannot provide a residual yet; it must be the last TSA used!')
# FIXME how to get a useful residual?
# -> the "residual" of the ARMA is ideally white noise, not a 0 vector, even if perfectly fit
# so what does it mean to provide the residual from the ARMA training?
# in order to use "predict" (in-sample forecasting) can't be in low-memory mode
# if settings['reduce_memory']:
# raise RuntimeError('Cannot get residual of ARMA if in reduced memory mode!')
# for tg, (target, data) in enumerate(params.items()):
# armaData = data['arma']
# modelParams = np.hstack([[armaData.get('const', 0)],
# armaData['ar'],
# armaData['ma'],
# [armaData.get('var', 1)]])
# new = armaData['model'].predict(modelParams)

def generate(self, params, pivot, settings):
"""
Generates a synthetic history from fitted parameters.
Expand Down
15 changes: 14 additions & 1 deletion ravenframework/TSA/Factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,24 @@
from .Wavelet import Wavelet
from .PolynomialRegression import PolynomialRegression
from .RWD import RWD
from .Transformers import ZeroFilter, LogTransformer, ArcsinhTransformer, TanhTransformer, SigmoidTransformer, \
OutTruncation, MaxAbsScaler, MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer

factory = EntityFactory('TimeSeriesAnalyzer')
# TODO map lower case to upper case, because of silly ROM namespace problems
aliases = {'Fourier': 'fourier',
'ARMA': 'arma',
'RWD': 'rwd',
'Wavelet': 'wavelet'}
'Wavelet': 'wavelet',
'ZeroFilter': 'zerofilter',
'OutTruncation': 'outtruncation',
'LogTransformer': 'logtransformer',
'ArcsinhTransformer': 'arcsinhtransformer',
'TanhTransformer': 'tanhtransformer',
'SigmoidTransformer': 'sigmoidtransformer',
'MaxAbsScaler': 'maxabsscaler',
'MinMaxScaler': 'minmaxscaler',
'StandardScaler': 'standardscaler',
'RobustScaler': 'robustscaler',
'QuantileTransformer': 'quantiletransformer'}
factory.registerAllSubtypes(TimeSeriesAnalyzer, alias=aliases)
97 changes: 67 additions & 30 deletions ravenframework/TSA/Fourier.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
import collections
import numpy as np
import sklearn.linear_model
import statsmodels.api as sm

from ..utils import InputData, InputTypes, randomUtils, xmlUtils, mathUtils, utils
from .TimeSeriesAnalyzer import TimeSeriesGenerator, TimeSeriesCharacterizer
from .TimeSeriesAnalyzer import TimeSeriesTransformer, TimeSeriesCharacterizer, TimeSeriesGenerator


# utility methods
class Fourier(TimeSeriesGenerator, TimeSeriesCharacterizer):
class Fourier(TimeSeriesTransformer, TimeSeriesCharacterizer, TimeSeriesGenerator):
"""
Perform Fourier analysis; note this is not Fast Fourier, where all Fourier modes are used to fit a
signal. Instead, detect the presence of specifically-requested Fourier bases.
Expand All @@ -35,12 +36,14 @@ class Fourier(TimeSeriesGenerator, TimeSeriesCharacterizer):
_features = ['sin', # amplitude of sine coefficients
'cos', # amplitude of cosine coefficients
'intercept'] # mean of signal
_acceptsMissingValues = True

@classmethod
def getInputSpecification(cls):
"""
Method to get a reference to a class that specifies the input data for
class cls.
@ In, None
@ Out, inputSpecification, InputData.ParameterInput, class to use for
specifying input of cls.
"""
Expand Down Expand Up @@ -73,14 +76,14 @@ def __init__(self, *args, **kwargs):
def handleInput(self, spec):
"""
Reads user inputs into this object.
@ In, inp, InputData.InputParams, input specifications
@ In, spec, InputData.InputParams, input specifications
@ Out, settings, dict, initialization settings for this algorithm
"""
settings = super().handleInput(spec)
settings['periods'] = spec.findFirst('periods').value
return settings

def characterize(self, signal, pivot, targets, settings, simultFit=True):
def fit(self, signal, pivot, targets, settings, simultFit=True):
"""
Determines the charactistics of the signal based on this algorithm.
@ In, signal, np.ndarray, time series with dims [time, target]
Expand All @@ -105,13 +108,11 @@ def characterize(self, signal, pivot, targets, settings, simultFit=True):
params = {}
for tg, target in enumerate(targets):
history = signal[:, tg] # TODO need to keep in sync with SyntheticSignal ROM!
mask = ~np.isnan(history) # some values might be NaN due to masking
if simultFit and cond < 30:
print(f'Fourier fitting condition number is {cond:1.1e} for "{target}". ',
' Calculating all Fourier coefficients at once.')
fourierEngine = sklearn.linear_model.LinearRegression(normalize=False)
fourierEngine.fit(fourierSignals, history)
intercept = fourierEngine.intercept_
coeffs = fourierEngine.coef_
intercept, coeffs = self._fitSignal(fourierSignals, history)
else:
print(f'Fourier fitting condition number is {cond:1.1e} for "{target}"! ',
'Calculating iteratively instead of all at once.')
Expand All @@ -124,10 +125,7 @@ def characterize(self, signal, pivot, targets, settings, simultFit=True):
coeffs = np.zeros(F2) # amplitude coeffs for sine, cosine
for fn in range(F2):
fSignal = fourierSignals[:,fn] # Fourier base signal for this waveform
eng = sklearn.linear_model.LinearRegression(normalize=False) # regressor
eng.fit(fSignal.reshape(H,1), signalToFit)
thisIntercept = eng.intercept_
thisCoeff = eng.coef_[0]
thisIntercept, thisCoeff = self._fitSignal(fSignal.reshape(H,1), signalToFit)
coeffs[fn] = thisCoeff
intercept += thisIntercept
# remove this signal from the signal to fit
Expand Down Expand Up @@ -160,6 +158,63 @@ def characterize(self, signal, pivot, targets, settings, simultFit=True):
# END for target in targets
return params

def _fitSignal(self, baseSignal, signalToFit):
"""
Utility for calculating least-squares approximation of Fourier coefficients
@ In, baseSignal, np.ndarray, signal composes of Fourier base(s)
@ In, signalToFit, np.ndarray, signal being detrended
@ Out, intercept, np.ndarray, intercept for the base Fourier signal
@ Out, coeffs, np.ndarray, coefficients of the various Fourier components
"""
fitResult = sm.OLS(signalToFit, sm.add_constant(baseSignal), missing='drop').fit()
intercept = fitResult.params[0]
coeffs = fitResult.params[1:]
return intercept, coeffs

def getResidual(self, initial, params, pivot, settings):
"""
Removes fitted Fourier signal from data
@ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in
same order as self.target
@ In, params, dict, training parameters as from self.characterize
@ In, pivot, np.array, time-like array values
@ In, settings, dict, additional settings specific to algorithm
@ Out, residual, np.array, reduced signal shaped [pivotValues, targets]
"""
synthetic = self.generate(params, pivot)
residual = initial - synthetic
return residual

def getComposite(self, initial, params, pivot, settings):
"""
Adds the Fourier signal back into initial signal
@ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in
same order as self.target
@ In, params, dict, training parameters as from self.characterize
@ In, pivot, np.array, time-like array values
@ In, settings, dict, additional settings specific to algorithm
@ Out, composite, np.array, resulting composite signal
"""
synthetic = self.generate(params, pivot)
residual = initial + synthetic
return residual

def generate(self, params, pivot):
"""
Generates a synthetic history from fitted parameters.
@ In, params, dict, characterization such as otained from self.characterize()
@ In, pivot, np.array(float), pivot parameter values
@ Out, synthetic, np.array(float), synthetic ARMA signal
"""
synthetic = np.zeros((len(pivot), len(params)))
for t, (target, data) in enumerate(params.items()):
synthetic[:, t] += data['intercept']
for period, coeffs in data['coeffs'].items():
C = coeffs['amplitude']
s = coeffs['phase']
synthetic[:, t] += mathUtils.evalFourier(period, C, s, pivot)
return synthetic

def getParamNames(self, settings):
"""
Return list of expected variable names based on the parameters
Expand Down Expand Up @@ -192,24 +247,6 @@ def getParamsAsVars(self, params):
rlz[f'{baseWave}__{stat}'] = value
return rlz


def generate(self, params, pivot, settings):
"""
Generates a synthetic history from fitted parameters.
@ In, params, dict, characterization such as otained from self.characterize()
@ In, pivot, np.array(float), pivot parameter values
@ In, settings, dict, additional settings specific to algorithm
@ Out, synthetic, np.array(float), synthetic ARMA signal
"""
synthetic = np.zeros((len(pivot), len(params)))
for t, (target, data) in enumerate(params.items()):
synthetic[:, t] += data['intercept']
for period, coeffs in data['coeffs'].items():
C = coeffs['amplitude']
s = coeffs['phase']
synthetic[:, t] += mathUtils.evalFourier(period, C, s, pivot)
return synthetic

def writeXML(self, writeTo, params):
"""
Allows the engine to put whatever it wants into an XML to print to file.
Expand Down
Loading

0 comments on commit 137d53f

Please sign in to comment.