From 12fc8b11d2c4960ac10325dc3d45060b6e285ef4 Mon Sep 17 00:00:00 2001 From: JonKing93 Date: Fri, 11 Sep 2020 17:24:19 -0700 Subject: [PATCH] Updated for alpha release 2 --- @dash/assertPositiveIntegers.m | 46 ++-- @dash/assertRealDefined.m | 46 ++++ @dash/assertScalarLogical.m | 17 ++ @dash/assertStrFlag.m | 17 +- @dash/assertStrList.m | 21 +- @dash/assertVectorTypeN.m | 32 +++ @dash/checkFileExists.m | 5 +- @dash/checkIndices.m | 52 +++++ @dash/checkStrsInList.m | 35 +++ @dash/collectField.m | 21 ++ @dash/dash.m | 36 ++-- @dash/messageList.m | 33 +++ @dash/parseInputs.m | 27 ++- @dash/permuteToOrder.m | 25 +++ @dash/preallocateStructs.m | 25 +++ @gridfile/add.m | 47 ++-- @gridfile/buildSourcesForFiles.m | 13 +- @gridfile/checkAllowedDims.m | 32 +-- @gridfile/checkMetadataField.m | 20 +- @gridfile/checkMetadataStructure.m | 15 +- @gridfile/defineMetadata.m | 7 +- @gridfile/expand.m | 4 +- @gridfile/gridfile.m | 9 +- @gridfile/hasDuplicateRows.m | 4 - @gridfile/info.m | 31 ++- @gridfile/load.m | 49 ++--- @gridfile/metadata.m | 4 +- @gridfile/new.m | 14 +- @gridfile/remove.m | 7 +- @gridfile/renameSources.m | 32 +-- @gridfile/repeatedLoad.m | 45 ++-- @gridfile/review.m | 47 +++- @gridfile/rewriteMetadata.m | 6 +- @gridfile/save.m | 3 +- @gridfile/sourceFilepath.m | 14 +- @gridfile/update.m | 6 +- @gridfile/updateMetadataField.m | 3 +- @stateVector/add.m | 67 ++++++ @stateVector/allowOverlap.m | 35 +++ @stateVector/append.m | 58 +++++ @stateVector/autoCouple.m | 35 +++ @stateVector/buildEnsemble.m | 238 +++++++++++++++++++++ @stateVector/checkVariableNames.m | 47 ++++ @stateVector/checkVariables.m | 35 +++ @stateVector/convertMetadata.m | 55 +++++ @stateVector/copy.m | 105 +++++++++ @stateVector/couple.m | 68 ++++++ @stateVector/design.m | 99 +++++++++ @stateVector/dimensions.m | 74 +++++++ @stateVector/errorTitle.m | 18 ++ @stateVector/extract.m | 26 +++ @stateVector/info.m | 108 ++++++++++ @stateVector/mean.m | 75 +++++++ @stateVector/notifyConsole.m | 19 ++ @stateVector/remove.m | 28 +++ @stateVector/rename.m | 20 ++ @stateVector/renameVariables.m | 35 +++ @stateVector/resetMeans.m | 42 ++++ @stateVector/resetMetadata.m | 42 ++++ @stateVector/sequence.m | 52 +++++ @stateVector/specifyMetadata.m | 32 +++ @stateVector/stateVector.m | 147 +++++++++++++ @stateVector/uncouple.m | 25 +++ @stateVector/updateCoupledVariables.m | 78 +++++++ @stateVector/variableNames.m | 39 ++++ @stateVector/weightedMean.m | 49 +++++ @stateVectorVariable/addIndices.m | 25 +++ @stateVectorVariable/assertAddIndices.m | 21 ++ @stateVectorVariable/buildEnsemble.m | 109 ++++++++++ @stateVectorVariable/checkDimensions.m | 49 +++++ @stateVectorVariable/checkGrid.m | 29 +++ @stateVectorVariable/convertMetadata.m | 69 ++++++ @stateVectorVariable/design.m | 152 +++++++++++++ @stateVectorVariable/dimMetadata.m | 74 +++++++ @stateVectorVariable/dimensions.m | 42 ++++ @stateVectorVariable/info.m | 173 +++++++++++++++ @stateVectorVariable/matchIndices.m | 30 +++ @stateVectorVariable/mean.m | 128 +++++++++++ @stateVectorVariable/parseInputCell.m | 30 +++ @stateVectorVariable/parseLogicalString.m | 53 +++++ @stateVectorVariable/removeOverlap.m | 84 ++++++++ @stateVectorVariable/rename.m | 18 ++ @stateVectorVariable/resetMeans.m | 36 ++++ @stateVectorVariable/resetMetadata.m | 29 +++ @stateVectorVariable/sequence.m | 88 ++++++++ @stateVectorVariable/specifyMetadata.m | 49 +++++ @stateVectorVariable/stateVectorVariable.m | 147 +++++++++++++ @stateVectorVariable/trim.m | 33 +++ @stateVectorVariable/weightedMean.m | 104 +++++++++ dataSource.m | 8 +- 90 files changed, 3919 insertions(+), 262 deletions(-) create mode 100644 @dash/assertRealDefined.m create mode 100644 @dash/assertScalarLogical.m create mode 100644 @dash/assertVectorTypeN.m create mode 100644 @dash/checkIndices.m create mode 100644 @dash/checkStrsInList.m create mode 100644 @dash/collectField.m create mode 100644 @dash/messageList.m create mode 100644 @dash/permuteToOrder.m create mode 100644 @dash/preallocateStructs.m create mode 100644 @stateVector/add.m create mode 100644 @stateVector/allowOverlap.m create mode 100644 @stateVector/append.m create mode 100644 @stateVector/autoCouple.m create mode 100644 @stateVector/buildEnsemble.m create mode 100644 @stateVector/checkVariableNames.m create mode 100644 @stateVector/checkVariables.m create mode 100644 @stateVector/convertMetadata.m create mode 100644 @stateVector/copy.m create mode 100644 @stateVector/couple.m create mode 100644 @stateVector/design.m create mode 100644 @stateVector/dimensions.m create mode 100644 @stateVector/errorTitle.m create mode 100644 @stateVector/extract.m create mode 100644 @stateVector/info.m create mode 100644 @stateVector/mean.m create mode 100644 @stateVector/notifyConsole.m create mode 100644 @stateVector/remove.m create mode 100644 @stateVector/rename.m create mode 100644 @stateVector/renameVariables.m create mode 100644 @stateVector/resetMeans.m create mode 100644 @stateVector/resetMetadata.m create mode 100644 @stateVector/sequence.m create mode 100644 @stateVector/specifyMetadata.m create mode 100644 @stateVector/stateVector.m create mode 100644 @stateVector/uncouple.m create mode 100644 @stateVector/updateCoupledVariables.m create mode 100644 @stateVector/variableNames.m create mode 100644 @stateVector/weightedMean.m create mode 100644 @stateVectorVariable/addIndices.m create mode 100644 @stateVectorVariable/assertAddIndices.m create mode 100644 @stateVectorVariable/buildEnsemble.m create mode 100644 @stateVectorVariable/checkDimensions.m create mode 100644 @stateVectorVariable/checkGrid.m create mode 100644 @stateVectorVariable/convertMetadata.m create mode 100644 @stateVectorVariable/design.m create mode 100644 @stateVectorVariable/dimMetadata.m create mode 100644 @stateVectorVariable/dimensions.m create mode 100644 @stateVectorVariable/info.m create mode 100644 @stateVectorVariable/matchIndices.m create mode 100644 @stateVectorVariable/mean.m create mode 100644 @stateVectorVariable/parseInputCell.m create mode 100644 @stateVectorVariable/parseLogicalString.m create mode 100644 @stateVectorVariable/removeOverlap.m create mode 100644 @stateVectorVariable/rename.m create mode 100644 @stateVectorVariable/resetMeans.m create mode 100644 @stateVectorVariable/resetMetadata.m create mode 100644 @stateVectorVariable/sequence.m create mode 100644 @stateVectorVariable/specifyMetadata.m create mode 100644 @stateVectorVariable/stateVectorVariable.m create mode 100644 @stateVectorVariable/trim.m create mode 100644 @stateVectorVariable/weightedMean.m diff --git a/@dash/assertPositiveIntegers.m b/@dash/assertPositiveIntegers.m index 7ee11a89..96acbb39 100644 --- a/@dash/assertPositiveIntegers.m +++ b/@dash/assertPositiveIntegers.m @@ -1,36 +1,46 @@ -function[] = assertPositiveIntegers(input, allowNaN, allowInf, name) +function[] = assertPositiveIntegers(input, name, allowNaN, allowInf) %% Checks that an input consists of positive integers. Optionally allows % NaN and Inf values. Returns customized error messages. % -% dash.assertPositiveIntegers(input, allowNaN, allowInf, name) +% dash.assertPositiveIntegers(input, name) +% Checks the input consists of positive integers. Does not allow NaN or Inf +% +% dash.assertPositiveIntegers(input, name, allowNaN, allowInf) +% Specify whether to allow NaN or Inf. % % ----- Inputs ----- % % input: The input being checked % -% allowNaN: A scalar logical. Whether to allow NaN values in the input. +% name: The name of the input. A string. Used for custom error messages. % -% allowInf: A scalar logical. Whether to allow Inf values in the input. +% allowNaN: A scalar logical that indicates whether to allow NaN values in +% the input (true) or not (false -- default). % -% name: The name of the input. Used for custom error messages. +% allowInf: A scalar logical that indicates whether to allow Inf values in +% the input (true) or not (false -- default). -% Process NaNs -if allowNaN - input(isnan(input)) = 1; -elseif any(isnan(input),'all') - error('%s may not contain NaN.', name); +% Defaults +if ~exist('allowNaN','var') || isempty(allowNaN) + allowNaN = false; +end +if ~exist('allowInf','var') || isempty(allowInf) + allowInf = false; end -% Process Inf -if allowInf - input(isinf(input)) = 1; -elseif any(isinf(input),'all') - error('%s may not contain Inf.', name); +% Require numeric +if ~isnumeric(input) + error('%s must be numeric', name); end -% Everything else -if ~isnumeric(input) || ~isreal(input) || any(input<1,'all') || any(mod(input,1)~=0,'all') - error('%s can only contain positive integers.', name); +% Process NaN and Inf +dash.assertRealDefined(input, name, allowNaN, allowInf); +input(isnan(input)) = 1; +input(isinf(input)) = 1; + +% Check for positive integers +if any(input<1,'all') || any(mod(input,1)~=0,'all') + error('%s must only contain positive integers.', name); end end \ No newline at end of file diff --git a/@dash/assertRealDefined.m b/@dash/assertRealDefined.m new file mode 100644 index 00000000..9b7b7452 --- /dev/null +++ b/@dash/assertRealDefined.m @@ -0,0 +1,46 @@ +function[] = assertRealDefined(input, name, allowNaN, allowInf, allowComplex) +%% Checks that an input is real, not NaN, and not Inf. Optionally allows +% NaN, Inf, or complex. Returns custom error messages. +% +% dash.assertRealDefined(input, name) +% Checks that an input is real, not NaN and not Inf. +% +% dash.assertRealDefined(input, name, allowNaN, allowInf, allowComplex) +% Optionally allow NaN, Inf, or complex. +% +% ----- Inputs ----- +% +% input: The input being checked +% +% name: The name of the input. A string. Used for custom error messages. +% +% allowNaN: Scalar logical indicating whether to allow NaN (true) or not +% (false -- default) +% +% allowInf: Scalar logical indicating whether to allow Inf (true) or not +% (false -- default) +% +% allowComplex: Scalar logical indicating whether to allow complex values +% (true) or not (false -- default) + +% Defaults +if ~exist('allowNaN','var') || isempty(allowNaN) + allowNaN = false; +end +if ~exist('allowInf','var') || isempty(allowInf) + allowInf = false; +end +if ~exist('allowComplex','var') || isempty(allowComplex) + allowComplex = false; +end + +% Check input +if ~allowNaN && any(isnan(input), 'all') + error('%s may not contain NaN', name); +elseif ~allowInf && any(isinf(input), 'all') + error('%s may not contain Inf', name); +elseif ~allowComplex && ~isreal(input) + error('%s may not contain complex (imaginary) values', name); +end + +end \ No newline at end of file diff --git a/@dash/assertScalarLogical.m b/@dash/assertScalarLogical.m new file mode 100644 index 00000000..7cf712f5 --- /dev/null +++ b/@dash/assertScalarLogical.m @@ -0,0 +1,17 @@ +function[] = assertScalarLogical(input, name) +%% Checks that an input is a scalar logical. Throws a custom error message +% if not. +% +% dash.assertScalarLogical(input, name) +% +% ----- Inputs ----- +% +% input: The input being checked +% +% name: The name of the input. A string. Used for error message. + +if ~isscalar(input) || ~islogical(input) + error('%s must be a scalar logical.', name); +end + +end \ No newline at end of file diff --git a/@dash/assertStrFlag.m b/@dash/assertStrFlag.m index 246fa006..ff9d77fb 100644 --- a/@dash/assertStrFlag.m +++ b/@dash/assertStrFlag.m @@ -1,17 +1,24 @@ -function[] = assertStrFlag( input, name ) +function[input] = assertStrFlag( input, name ) %% Checks that an input is a string flag. Returns a customized error -% message if not. +% message if not. Optionally returns input as a string data type. % -% dash.assertStrFlags( input, name ) +% dash.assertStrFlag( input, name ) +% +% input = dash.assertStrFlag(input, name) % % ----- Inputs ----- % % input: A variable being checked. % -% names: The name of the variables to use in error messages. A string. +% name: The name of the variable to use in the error message. A string. +% +% ----- Outputs ----- +% +% input: The input as a string data type. if ~dash.isstrflag(input) error('%s must be a string scalar or character row vector.',name); end - +input = string(input); + end \ No newline at end of file diff --git a/@dash/assertStrList.m b/@dash/assertStrList.m index d3c40edf..e45e197f 100644 --- a/@dash/assertStrList.m +++ b/@dash/assertStrList.m @@ -1,5 +1,24 @@ -function[] = assertStrList(input, name) +function[input] = assertStrList(input, name) +%% Checks that an input is a string list. Returns a customized error +% message if not. Optionally returns input as a string data type. +% +% dash.assertStrList(input, name) +% +% input = dash.assertStrList(input, name) +% +% ----- Inputs ----- +% +% input: The input being checked +% +% name: The name of a variable being check. A string. +% +% ----- Outputs ----- +% +% input: The input as a string data type. + if ~dash.isstrlist(input) error('%s must be a string vector or cellstring vector.', name); end +input = string(input); + end \ No newline at end of file diff --git a/@dash/assertVectorTypeN.m b/@dash/assertVectorTypeN.m new file mode 100644 index 00000000..e6e316f9 --- /dev/null +++ b/@dash/assertVectorTypeN.m @@ -0,0 +1,32 @@ +function[] = assertVectorTypeN( input, type, N, name ) +%% Checks that an input is a vector with length N. Optionally also checks +% the vector is a specific data type. Returns a customized error message if not. +% +% dash.assertVectorTypeN(input, [], N, name) +% Checks the input is a vector of length N. +% +% dash.assertVectorTypeN(input, type, [], name) +% Checks the input is a vector of a specific type. +% +% dash.assertVectorTypeN(input, type, N, name) +% Checks both type and length. +% +% ----- Inputs ----- +% +% input: The input being checked. +% +% type: The required data type. Use [] to not check the type. +% +% N: The required length of the vector. Use [] to not check the length. +% +% name: The name of the input. Used for custom error message. + +if ~isvector(input) + error('%s must be a vector.', name); +elseif ~isempty(N) && numel(input)~=N + error('%s must have %.f elements, but it has %.f elements instead.', name, N, numel(input)); +elseif ~isempty(type) && ~isa(input, type) + error('%s must be a %s vector, but it is a %s vector instead.', name, type, class(input)); +end + +end \ No newline at end of file diff --git a/@dash/checkFileExists.m b/@dash/checkFileExists.m index 8373459f..3a624490 100644 --- a/@dash/checkFileExists.m +++ b/@dash/checkFileExists.m @@ -33,7 +33,7 @@ % Throw error if the file doesn't exist if isempty(path) if ~exist - error('Could not find file %s. It may be misspelled or not on the active path.', file); + error("DASH:missingFile",'Could not find file %s. It may be misspelled or not on the active path.', file); end % Get the path string if off the active path. @@ -42,4 +42,7 @@ rmpath(fileparts(file)); end +% Use string internally +path = string(path); + end \ No newline at end of file diff --git a/@dash/checkIndices.m b/@dash/checkIndices.m new file mode 100644 index 00000000..4c8d866d --- /dev/null +++ b/@dash/checkIndices.m @@ -0,0 +1,52 @@ +function[indices] = checkIndices( indices, name, length, lengthName ) +%% Checks that an input is a set of indices. Indices may be a logical +% vector the length of a dimension, or a vector of linear indices. Linear +% indices may not exceed the dimension length. Returns custom error +% messages. Converts logical indices to linear indices. +% +% indices = dash.checkIndices( indices, name, length, lengthName ) +% +% ----- Inputs ----- +% +% indices: The indices being checked. +% +% name: The name of the indices. Used for custom error messages. +% +% length: The length of the array dimension. This is the maximum value +% for linear indices and the required length of logical indices. +% +% lengthName: The name of the length of the array dimension. A string. +% +% ----- Outputs ----- +% +% indices: Linear indices + +% Allow empty call +if ~isequal(indices, []) + + % Vector + if ~isvector(indices) + error('%s must be a vector.',name); + end + + % Logical indices + if islogical(indices) + if numel(indices)~=length + error('%s is a logical vector, but it is not %s (%.f).', name, lengthName, length); + end + indices = find(indices); + + % Numeric indices + elseif isnumeric(indices) + dash.assertPositiveIntegers(indices, name); + if max(indices) > length + error('%s has elements larger than %s (%.f).', name, lengthName, length); + end + + % Other types are not allowed + else + error('%s must either be logical or numeric.'); + end +end + +end diff --git a/@dash/checkStrsInList.m b/@dash/checkStrsInList.m new file mode 100644 index 00000000..5f570b65 --- /dev/null +++ b/@dash/checkStrsInList.m @@ -0,0 +1,35 @@ +function[k] = checkStrsInList(input, list, name, listName ) +%% Checks that an input is a set of strings that are all members of a list. +% Throws a custom error message if not. Returns the indices of the strings +% in the list. +% +% k = dash.checkStrsInList(input, list, name, listMessage) +% +% ----- Inputs ----- +% +% input: The input being checked +% +% list: A list of allowed strings. A string vector. +% +% name: The name of the input. A string +% +% listName: Name of the list. A string + +% Check the input is a string list +dash.assertStrList(input, name); +input = string(input); + +% Check all strings are allowed. Get their indices in the list. +[inList, k] = ismember(input, list); +if any(~inList) + bad = find(~inList,1); + + % Informative error message + badName = name; + if numel(input)>1 + badName = sprintf('Element %.f in %s (%s)', bad, name, input(bad)); + end + error('%s is not a %s. Allowed values are %s.', badName, listName, dash.messageList(list)); +end + +end \ No newline at end of file diff --git a/@dash/collectField.m b/@dash/collectField.m new file mode 100644 index 00000000..830cb1ef --- /dev/null +++ b/@dash/collectField.m @@ -0,0 +1,21 @@ +function[values] = collectField(s, field) +%% Collects the values in a field of a structure vector. +% +% values = dash.collectField(s, field) +% +% ----- Inputs ----- +% +% s: The structure vector +% +% field: The name of the field. A string scalar or character row vector. +% +% ----- Outputs ----- +% +% values: The values in the field. A cell vector with one element per +% structure in s. + +nEls = numel(s); +values = cell(nEls, 1); +[values{:}] = deal(s.(field)); + +end \ No newline at end of file diff --git a/@dash/dash.m b/@dash/dash.m index 85a5d814..d25879ed 100644 --- a/@dash/dash.m +++ b/@dash/dash.m @@ -4,30 +4,38 @@ methods (Static) - % Global data for dimension names + % Misc names = dimensionNames; + varargout = parseInputs(inArgs, flags, defaults, nPrev); + convertToV7_3(filename); + X = permuteToOrder(X, order, nDims); - % Files and paths + % Structures + [s, inputs] = preallocateStructs(fields, siz); + values = collectField(s, field); + + % File paths path = checkFileExists(file); path = unixStylePath(path); path = relativePath(toFile, fromFolder); - % Input error checks + % Strings and string lists tf = isstrflag( input ); tf = isstrlist( input ); - tf = isrelative( name ); - assertStrFlag(input, name); - assertStrList(input, name); - assertNumericVectorN(input, N, name); + input = assertStrFlag(input, name); + input = assertStrList(input, name); + k = checkStrsInList(input, list, name, message); + str = messageList(list); + + % Input assertions + assertScalarLogical(input, name); + assertRealDefined(input, name, allowNaN, allowInf, allowComplex); + assertVectorTypeN(input, type, N, name); assertPositiveIntegers(input, allowNaN, allowInf, name); - str = errorStringList(strings); - varargout = parseInputs(inArgs, flags, defaults, nPrev); - - % Indices and start, count, stride. - indices = equallySpacedIndices(indices); - % File formats - convertToV7_3(filename); + % Indices + indices = checkIndices(indices, name, dimLength, dimName); + indices = equallySpacedIndices(indices); end end diff --git a/@dash/messageList.m b/@dash/messageList.m new file mode 100644 index 00000000..94ce22ff --- /dev/null +++ b/@dash/messageList.m @@ -0,0 +1,33 @@ +function[str] = messageList( list ) +% Returns a numeric of string list as a formatted string for use in +% messages. +% +% str = dash.messageList(list) +% +% ----- Inputs ----- +% +% list: The list being formated. Numeric or string. +% +% ----- Outputs ----- +% +% str: A formatted output string + +% Formatting for sprintf +if isstring(list) + format = {'"%s"', '"%s" and "%s"', '"%s", ', 'and "%s"'}; +elseif isnumeric(list) + format = {'%.f','%.f and %.f', '%.f, ', 'and %.f'}; +end + +% Create the outout string +if numel(list)==0 + str = ''; +elseif numel(list)==1 + str = sprintf(format{1}, list); +elseif numel(list) == 2 + str = sprintf(format{2}, list(1), list(2)); +else + str = [sprintf(format{3}, list(1:end-1)), sprintf(format{4}, list(end))]; +end + +end \ No newline at end of file diff --git a/@dash/parseInputs.m b/@dash/parseInputs.m index 15547143..c158b2d4 100644 --- a/@dash/parseInputs.m +++ b/@dash/parseInputs.m @@ -1,7 +1,7 @@ function[varargout] = parseInputs( inArgs, flags, defaults, nPrev ) %% Parses inputs for flag, value input pairs. % -% [values] = dash.parseInputs( inArgs, flags, defaults, switches ) +% [values] = dash.parseInputs( inArgs, flags, defaults, nPrev ) % % ----- Inputs ----- % @@ -20,9 +20,11 @@ % values: The parsed values. A cell vector. Has one element for each flag. % Error check the inputs -assertCellVector(inArgs, "inArgs"); +if ~isempty(inArgs) + dash.assertVectorTypeN(inArgs, 'cell', [], 'inArgs'); +end dash.assertStrList(flags, "flags"); -assertCellVector(defaults, "defaults"); +dash.assertVectorTypeN(defaults, 'cell', [], 'defaults'); flags = string(flags); nFlags = numel(flags); @@ -41,13 +43,14 @@ if ~isempty(inArgs) setValue = false(nFlags, 1); - % Check that the input flags are strings and recognized + % Check that the input flags are strings and recognized. for k = 1:2:numel(inArgs) - dash.assertStrFlag(inArgs{k}, sprintf('Input %.f', k+nPrev)); - f = strcmp(inArgs{k}, flags); - if sum(f)==0 - error('Input %.f is not a recognized flag. Allowed flags are %s.', k+nPrev, dash.errorStringList(flags)); - elseif setValue(f) + name = sprintf('Input %.f', k+nPrev); + dash.assertStrFlag( inArgs{k}, name ); + f = dash.checkStrsInList( inArgs{k}, flags, name, 'recognized flag'); + + % Prevent duplicates + if setValue(f) error('The %s flag is set multiple times.', flags(f)); end @@ -57,10 +60,4 @@ end end -end - -function[] = assertCellVector(input, name) - if ~iscell(input) || ~isvector(name) - error('%s must be a cell vector.', name); - end end \ No newline at end of file diff --git a/@dash/permuteToOrder.m b/@dash/permuteToOrder.m new file mode 100644 index 00000000..0577f3bb --- /dev/null +++ b/@dash/permuteToOrder.m @@ -0,0 +1,25 @@ +function[X] = permuteToOrder(X, order, nDims) +%% Permutes an array so that the its dimensions are in a specified order. +% +% X = dash.permuteToOrder(X, order, nDims) +% +% ----- Inputs ----- +% +% X: An array +% +% order: Specifies the order of the current dimensions in the final +% permuted array. First element is the desired location of the current +% first dimension, etc. +% +% nDims: The number of dimensions in the final array +% +% ----- Outputs ----- +% +% X: The final permuted array + +dims = 1:nDims; +[~, reorder] = ismember(dims, order); +reorder(reorder==0) = dims(~ismember(dims, reorder)); +X = permute(X, reorder); + +end \ No newline at end of file diff --git a/@dash/preallocateStructs.m b/@dash/preallocateStructs.m new file mode 100644 index 00000000..c03e64d3 --- /dev/null +++ b/@dash/preallocateStructs.m @@ -0,0 +1,25 @@ +function[s, inputs] = preallocateStructs( fields, siz ) +%% Preallocates a struct array. Fills all fields with empty arrays. +% +% [s, inputs] = dash.preallocateStruct(fields, siz) +% +% ----- Inputs ----- +% +% fields: A cell vector. Each element contains the name of a field in the +% structures. +% +% siz: The size of the preallocated struct array +% +% ----- Outputs ----- +% +% s: The struct array +% +% inputs: A cell vector that can be used to pass inputs to create a +% struct. Elements 1:2:end-2 are field names. + +nFields = numel(fields); +inputs = repmat( {[]}, [1, nFields*2] ); +inputs(1:2:end) = fields; +s = repmat( struct(inputs{:}), siz ); + +end \ No newline at end of file diff --git a/@gridfile/add.m b/@gridfile/add.m index 56823081..c6992d0c 100644 --- a/@gridfile/add.m +++ b/@gridfile/add.m @@ -22,10 +22,10 @@ % Y = aX + b to all values. See the details of the "convert" input. If unset, % does not apply a linear transformation to loaded data. % -% obj.add( ..., 'relativePath', relative ) -% Specify whether to save the data source file name as a path relative to -% the .grid file or as an absolute path. If unspecified, uses the relative -% path. +% obj.add( ..., 'absolutePath', absolute ) +% Specify whether to save the data source file name as an absolute path or +% as a path relative to the .grid file. If unspecified, uses a relative +% path when possible. % % ----- Inputs ----- % @@ -45,7 +45,7 @@ % % meta: The dimensional metadata structure for the data in the source file. % See gridfile.defineMetadata. Must include metadata for all -% non-singleton dimensions in the .grid file (see ) and for all +% non-singleton dimensions in the .grid file and for all % non-trailing dimensions in the source file. The number of rows in each % metadata field must match the length of the dimension in the source % file. Each metadata field must exactly match a contiguous sequence of @@ -64,30 +64,24 @@ % multiplicative constant (a). The second element specifieds the % additive constant (b). % -% relative: A scalar logical indicating whether to save data source file -% names as a path relative to the .grid file (true), or as an absolute -% path (false). +% absolute: A scalar logical indicating whether to save data source file +% names as an absolute path (true), or as a path relative to the .grid +% file (false). Default is false. % Update the gridfile object in case the file was changed. obj.update; -% Parse and error check the optional inputs (fill, range, convert) -[fill, range, convert, relative] = dash.parseInputs( varargin, {'fill','validRange','convert','relative'}, ... - {NaN, [-Inf, Inf], [1 0], true}, 5 ); -if ~isnumeric(fill) || ~isscalar(fill) - error('fill must be a numeric scalar.'); -elseif ~isvector(range) || numel(range)~=2 || ~isnumeric(range) - error('range must be a numeric vector with two elements.'); -elseif ~isreal(range) || any(isnan(range)) - error('range may not contain contain complex values or NaN.'); -elseif range(1) > range(2) +% Parse and error check the optional inputs (fill, range, convert, absolute) +[fill, range, convert, absolute] = dash.parseInputs( varargin, {'fill','validRange','convert','absolutePath'}, ... + {NaN, [-Inf, Inf], [1 0], false}, 5 ); +dash.assertScalarLogical(absolute, 'absolute'); +dash.assertVectorTypeN(fill, 'numeric', 1, 'fill'); +dash.assertVectorTypeN(convert, 'numeric', 2, 'convert'); +dash.assertRealDefined(convert, 'convert'); +dash.assertVectorTypeN(range, 'numeric', 2, 'range'); +dash.assertRealDefined(range, 'range', false, true); +if range(1) > range(2) error('The first element of range cannot be larger than the second element.'); -elseif ~isvector(range) || ~isnumeric(convert) || numel(convert)~=2 - error('convert must be a numeric vector with two elements.'); -elseif ~isreal(convert) || any(isnan(convert)) || any(isinf(convert)) - error('convert may not contain complex values, NaN, or Inf.'); -elseif ~isscalar(relative) || ~islogical(relative) - error('relative must be a scalar logical.'); end % Create the dataSource object. This will error check type, file, var, and @@ -108,7 +102,7 @@ % Error check the metadata. Require values for non-singleton grid % dimensions and non-trailing dimensions in the source data. -gridfile.checkMetadataStructure(meta, obj.dims(obj.isdefined), "dimensions with defined metadata in the .grid file"); +meta = gridfile.checkMetadataStructure(meta, obj.dims(obj.isdefined), "dimensions with defined metadata in the .grid file"); metaFields = string(fields(meta)); gridRequired = obj.dims( obj.size~=1 ); sourceRequired = source.mergedDims(1:ts1-1); @@ -163,7 +157,7 @@ % Convert the dataSource object into a structure of primitives and % implement the desired filepath style source = gridfile.convertSourceToPrimitives(source); -source.file = obj.sourceFilepath(source.file, relative); +source.file = obj.sourceFilepath(source.file, absolute); % Preallocate the length of each of the primitive fields sourceFields = fields(obj.source); @@ -191,6 +185,7 @@ % Update the other source variables and save obj.fieldLength = cat(1, obj.fieldLength, fieldLength); obj.dimLimit = cat(3, obj.dimLimit, dimLimit); +obj.absolutePath = cat(1, obj.absolutePath, absolute); obj.save; end \ No newline at end of file diff --git a/@gridfile/buildSourcesForFiles.m b/@gridfile/buildSourcesForFiles.m index a4c62044..59f387b5 100644 --- a/@gridfile/buildSourcesForFiles.m +++ b/@gridfile/buildSourcesForFiles.m @@ -30,9 +30,18 @@ unmerge = textscan(char(dims(s)), '%s', 'Delimiter', ','); unmerge = string( unmerge{:}'); - % Build the data source - sources{s} = dataSource.new( type(s), filenames(s), var(s), unmerge, ... + % Build the data source. + try + sources{s} = dataSource.new( type(s), filenames(s), var(s), unmerge, ... fill{s}, range{s}, convert{s} ); + + % Provide extra error information if the data source file is missing + catch ME + if strcmp(ME.identifier, "DASH:missingFile") + error('Cannot find data source file "%s". It may have been moved, renamed, or deleted. If the file was moved or renamed, see "gridfile.renameSources" to update the data source file path.', filenames(s)); + end + rethrow(ME); + end end end \ No newline at end of file diff --git a/@gridfile/checkAllowedDims.m b/@gridfile/checkAllowedDims.m index 2c26836a..77f8dd95 100644 --- a/@gridfile/checkAllowedDims.m +++ b/@gridfile/checkAllowedDims.m @@ -19,30 +19,18 @@ end dims = string(dims); -% Check that the dims are all allowed names -gridDims = obj.dims; +% Get the list of allowed dimensions and its name for error messages. +[~,name,ext] = fileparts(obj.file); +filename = strcat(name, ext); if requireDefined - gridDims = gridDims(obj.isdefined); + allowed = obj.dims(obj.isdefined); + allowedName = sprintf('dimension with defined metadata in .grid file %s', filename); +else + allowed = obj.dims; + allowedName = sprintf('dimension recognized by .grid file %s', filename); end -allowed = ismember(dims, gridDims); -% Build an error message if not. -if any(~allowed) - bad = find(~allowed,1); - - % Single input dim, use the name. Array input, include the list element - id = sprintf('%s', dims); - if numel(dims)>1 - id = sprintf('Element %.f in dims (%s)', bad, dims(bad)); - end - - % Specify whether dim must be recognized or defined - require = ["recognized by", "Recognized dimensions"]; - if requireDefined - require = ["with defined metadata in", "Dimensions with define metadata"]; - end - - error('%s is not a dimension %s .grid file %s. %s are %s.', id, require(1), obj.file, require(2), dash.errorStringList(gridDims)); -end +% Check the dimensions are allowed +dash.checkStrsInList(dims, allowed, 'dims', allowedName); end \ No newline at end of file diff --git a/@gridfile/checkMetadataField.m b/@gridfile/checkMetadataField.m index 30ffc7d5..ad28f47d 100644 --- a/@gridfile/checkMetadataField.m +++ b/@gridfile/checkMetadataField.m @@ -1,7 +1,8 @@ -function[] = checkMetadataField( meta, dim ) -%% Error checks the metadata for a grid dimension. +function[meta] = checkMetadataField( meta, dim ) +%% Error checks the metadata for a grid dimension. Converts cellstring to +% string. % -% gridfile.checkMetadataField(meta, dim) +% meta = gridfile.checkMetadataField(meta, dim) % % ----- Input ----- % @@ -9,6 +10,10 @@ % % dim: The name of the dimension associated with the metadata field. (Used % for error messages.) +% +% ----- Outputs ----- +% +% meta: The metadata field. Cellstrings converted to string. % Type if ~isnumeric(meta) && ~islogical(meta) && ~ischar(meta) && ... @@ -22,13 +27,16 @@ % Illegal elements elseif isnumeric(meta) && any(isnan(meta(:))) error('The %s metadata contains NaN elements.', dim ); -elseif isnumeric(meta) && any(isinf(meta(:))) - error('The %s metadata contains Inf elements.', dim ); elseif isdatetime(meta) && any( isnat(meta(:)) ) error('The %s metadata contains NaT elements.', dim ); end -% Duplicate rows. Convert cellstring to string for unique with rows option +% Convert cellstring to string +if iscellstr(meta) %#ok + meta = string(meta); +end + +% Check there are no duplicate rows. if gridfile.hasDuplicateRows(meta) error('The %s metadata contains duplicate rows.', dim); end diff --git a/@gridfile/checkMetadataStructure.m b/@gridfile/checkMetadataStructure.m index 19e578ff..6541731a 100644 --- a/@gridfile/checkMetadataStructure.m +++ b/@gridfile/checkMetadataStructure.m @@ -1,7 +1,8 @@ -function[] = checkMetadataStructure( meta, dims, errorString ) -%% Checks that a dimensional metadata structure is valid. +function[meta] = checkMetadataStructure( meta, dims, errorString ) +%% Checks that a dimensional metadata structure is valid. Converts any +% cellstring fields to string. % -% gridfile.checkMetadataStructure( meta, dims, errorString ) +% meta = gridfile.checkMetadataStructure( meta, dims, errorString ) % % ----- Inputs ----- % @@ -11,6 +12,10 @@ % % errorString: An identifier for the allowed dimension names for use in an % error message. +% +% ----- Outputs ----- +% +% meta: The metadata structure with cellstring metadata converted to string. % Scalar struct if ~isscalar(meta) || ~isstruct(meta) @@ -21,13 +26,13 @@ metaFields = string(fields(meta)); allowed = ismember(metaFields, dims); if any( ~allowed ) - error('Only %s (%s) are allowed as field names in meta.', errorString, dash.errorStringList(dims) ); + error('Only %s (%s) are allowed as field names in meta.', errorString, dash.messageList(dims) ); end % Metadata values for d = 1:numel(metaFields) name = metaFields(d); - gridfile.checkMetadataField( meta.(name), name ); + meta.(name) = gridfile.checkMetadataField( meta.(name), name ); end end \ No newline at end of file diff --git a/@gridfile/defineMetadata.m b/@gridfile/defineMetadata.m index 67c6e56e..2e8300f2 100644 --- a/@gridfile/defineMetadata.m +++ b/@gridfile/defineMetadata.m @@ -13,7 +13,7 @@ % metaN: Metadata for the Nth specified dimension. A numeric, logical, % char, string, cellstring, or datetime matrix. Each row is treated % as the metadata for one dimension element. Each row must be unique -% and cannot contain NaN, Inf, or NaT elements. Cellstring metadata +% and cannot contain NaN or NaT elements. Cellstring metadata % will be converted into the "string" type. % % ----- Outputs ----- @@ -43,7 +43,7 @@ for v = 2:2:nargin dim = varargin{v-1}; value = varargin{v}; - gridfile.checkMetadataField(value, dim); + value = gridfile.checkMetadataField(value, dim); % Warn user if metadata is a row vector. (They probably want a column) if isrow(value) && ~isscalar(value) @@ -51,9 +51,6 @@ end % Add to the output metadata structure - if iscellstr(value) %#ok - value = string(value); - end meta.(varargin{v-1}) = value; end diff --git a/@gridfile/expand.m b/@gridfile/expand.m index c104c6ff..3ff9300a 100644 --- a/@gridfile/expand.m +++ b/@gridfile/expand.m @@ -22,9 +22,9 @@ obj.update; % Error check -dash.assertStrFlag(dim, "dim"); +dim = dash.assertStrFlag(dim, "dim"); obj.checkAllowedDims(dim, true); -obj.checkMetadataField(meta, dim); +meta = obj.checkMetadataField(meta, dim); % Get the old metadata oldMeta = obj.meta.(dim); diff --git a/@gridfile/gridfile.m b/@gridfile/gridfile.m index b32ea4c3..4a4b6bd1 100644 --- a/@gridfile/gridfile.m +++ b/@gridfile/gridfile.m @@ -8,7 +8,6 @@ % To create a new gridfile object, use: % obj = gridfile(filename) % - % *** Essential *** % gridFile Methods: % defineMetadata - Defines metadata for a .grid file or data source % new - Initializes a new .grid file. @@ -39,6 +38,7 @@ fieldLength; % The length of primitive arrays for the source fields maxLength; % The length of the padded primitive arrays in the .grid file dimLimit; % The index limits of each data source in each dimension (nDim x 2 x nSource) + absolutePath; % Whether to store a data source file name exclusively as an absolute path end % Global configuration. @@ -48,10 +48,9 @@ % Static utilities methods (Static) - checkMetadataField( meta, dim ); - checkMetadataStructure( meta, dims, errorString ); + meta = checkMetadataField( meta, dim ); + meta = checkMetadataStructure( meta, dims, errorString ); tf = hasDuplicateRows(meta); - [meta, siz] = processMetadata(meta); source = convertSourceToPrimitives(source); dims = commaDelimitedDims(dims); @@ -122,7 +121,7 @@ % obj: A gridfile object for the specified .grid file. % Check the input is a file name - dash.assertStrFlag(file, "file"); + file = dash.assertStrFlag(file, "file"); obj.file = dash.checkFileExists(file); % Fill the fields diff --git a/@gridfile/hasDuplicateRows.m b/@gridfile/hasDuplicateRows.m index 29b9a062..ea1eb275 100644 --- a/@gridfile/hasDuplicateRows.m +++ b/@gridfile/hasDuplicateRows.m @@ -13,10 +13,6 @@ % (true) or not (false) tf = false; - -if iscellstr(meta) %#ok - meta = string(meta); -end if size(meta,1) ~= size(unique(meta,'rows'),1) tf = true; end diff --git a/@gridfile/info.m b/@gridfile/info.m index 48b7e4bc..8b2a6b7e 100644 --- a/@gridfile/info.m +++ b/@gridfile/info.m @@ -20,8 +20,9 @@ % % ----- Inputs ----- % -% s: A vector of linear indices. Cannot exceed the number of data sources -% managed by the .grid file. +% s: The indices of specific data sources within the .grid file. Either a +% vector of linear indices or a logical vector with one element per data +% source. % % filenames: A list of data source filenames. A string vector or cellstring % vector. Must include the file extension. Ignores the file path. @@ -56,15 +57,9 @@ index = unique([index, find(match)]); end -% If numeric, check for postive integers no higher than nSource -elseif isnumeric(sources) - dash.assertPositiveIntegers(sources, false, false, "s"); - if any(sources>nSource) - error('The largest element in s (%.f) is greater than the number of data sources in the .grid file (%.f)', max(sources), nSource); - end - index = sources; - -% Anything else throw error +% Check indices are valid. Throw error for anything else +elseif isnumeric(sources) || islogical(sources) + index = dash.checkIndices(sources, 's', nSource, 'the number of data sources in the .grid file'); else error('The first input may be ''all'', a list of file names, or a set of linear indices.'); end @@ -94,9 +89,7 @@ % Preallocate source structure nSource = numel(index); sourceFields = {"file","variable","dimensions","size","metadata","fillValue","validRange","linearTransformation"}; -pre = repmat( {[]}, [1, numel(sourceFields)*2]); -pre(1:2:end) = sourceFields; -sourceInfo = repmat(struct(pre{:}), [nSource, 1]); +[sourceInfo, inputs] = dash.preallocateStructs(sourceFields, [nSource, 1]); % Source information sources = obj.buildSources(index); @@ -105,16 +98,16 @@ sourceDims = [sources{s}.mergedDims, dims(singleton)]; sourceSize = [sources{s}.mergedSize, ones(1,(numel(singleton)))]; for d = 1:nDim - limit = obj.dimLimit(d,1,index(s)) : obj.dimLimit(d,2,index(s)); + k = find(strcmp(dims(d), obj.dims)); + limit = obj.dimLimit(k,1,index(s)) : obj.dimLimit(k,2,index(s)); sourceMeta.(dims(d)) = obj.meta.(dims(d))(limit,:); end % Source output structure if nargout~=0 - input = pre; - input(2:2:end) = {sources{s}.file, sources{s}.var, sourceDims, sourceSize, ... + inputs(2:2:end) = {sources{s}.file, sources{s}.var, sourceDims, sourceSize, ... sourceMeta, sources{s}.fill, sources{s}.range, sources{s}.convert}; - sourceInfo(s) = struct(input{:}); + sourceInfo(s) = struct(inputs{:}); % Print source to console else @@ -127,7 +120,7 @@ fprintf('The valid range is %s to %s.\n', num2str(sources{s}.range(1)), num2str(sources{s}.range(2))); end if ~isequal(sources{s}.convert, [1 0]) - fprintf('The data will be linearly transformed via: Y = %sX * %s\n', num2str(sources{s}.convert(1)), num2str(sources{s}.convert(2)) ); + fprintf('The data will be linearly transformed via: Y = %s * X + %s\n', num2str(sources{s}.convert(1)), num2str(sources{s}.convert(2)) ); end dimStr = sprintf('%s x ', sourceDims); fprintf('%s is (%s\b\b\b).\n', sources{s}.var, dimStr); diff --git a/@gridfile/load.m b/@gridfile/load.m index 285753de..24cd753a 100644 --- a/@gridfile/load.m +++ b/@gridfile/load.m @@ -28,8 +28,7 @@ % ----- Inputs ----- % % dims: A list of dimension names for which additional load arguments are -% being specified. A string vector or cellstring vector. Only dimensions -% with defined metadata in the .grid file are allowed. Any dimensions +% being specified. A string vector or cellstring vector. Any dimensions % not listed in dims will have all elements loaded. % % indices: A cell vector. Must have the same number of elements as dims. @@ -94,8 +93,8 @@ end % Error check the dimensions -dash.assertStrList(dims, "dims"); -obj.checkAllowedDims(dims, true); +dims = dash.assertStrList(dims, "dims"); +obj.checkAllowedDims(dims, false); if numel(dims) < numel(unique(dims)) error('dims contains duplicate names.'); end @@ -107,14 +106,13 @@ if ~haveIndices inputIndices = cell(1, nInputDims); - % Error check inputs + % Error check start, count, stride inputs input = {start, count, stride}; name = ["start","count","stride"]; allowInf = [false true false]; - for i = 1:numel(input) - dash.assertNumericVectorN( input{i}, nInputDims, name(i) ); - dash.assertPositiveIntegers( input{i}, false, allowInf(i), name(i) ); + dash.assertVectorTypeN( input{i}, 'numeric', nInputDims, name(i) ); + dash.assertPositiveIntegers( input{i}, name(i), false, allowInf(i) ); if any( input{i}>obj.size(inputOrder) & ~isinf(input{i}) ) bad = find(input{i}>obj.size(inputOrder),1); error('Element %.f of %s (%.f) is larger than the length of the %s dimension (%.f)', bad, name(i), start(bad), obj.dims(inputOrder(bad)), obj.size(inputOrder(bad)) ); @@ -131,11 +129,9 @@ inputIndices{d} = start(d):stride(d):stop; end -% If the user specified the indices, error check -else - if ~isvector(inputIndices) || numel(inputIndices) ~= nInputDims - error('indices must be a vector with %.f elements.', nInputDims); - end +% User specified indices. Error check cell +else + dash.assertVectorTypeN(inputIndices, 'cell', nInputDims, 'indices'); % Default for empty indices for d = 1:nInputDims @@ -143,29 +139,10 @@ inputIndices{d} = 1:obj.size(inputOrder(d)); end - % Error check - if ~isvector(inputIndices{d}) - error('Element %.f of indices must be a vector.', d); - end - - % Error check logical indices. Convert to linear. - if islogical(inputIndices{d}) - if numel(inputIndices{d})~=obj.size(inputOrder(d)) - error('Element %.f of indices is a logical vector, but it is not the length of the %s dimension (%.f)', d, obj.dims(inputOrder(d)), obj.size(inputOrder(d)) ); - end - inputIndices{d} = find(inputIndices{d}); - - % Error check linear indices. - elseif isnumeric(inputIndices{d}) - dash.assertPositiveIntegers(inputIndices{d}, false, false, sprintf('Element %.f of indices',d)); - if max(inputIndices{d})>obj.size(inputOrder(d)) - error('Element %.f of indices specifies values up to %.f, which is larger than the length of the %s dimension (%.f)', find(inputIndices{d}==max(inputIndices{d},1)), max(inputIndices{d}), obj.dims(inputOrder(d)), obj.size(inputOrder(d)) ); - end - - % Other types are not allowed - else - error('Element %.f of indices must be a logical or numeric vector.', d); - end + % Error check indices for individual dimensions + name = sprintf('Element %.f of indices', d); + lengthName = sprintf('the length of the %s dimension', obj.dims(inputOrder(d))); + inputIndices{d} = dash.checkIndices(inputIndices{d}, name, obj.size(inputOrder(d)), lengthName ); end end diff --git a/@gridfile/metadata.m b/@gridfile/metadata.m index 62d2cb4a..15dfc6ee 100644 --- a/@gridfile/metadata.m +++ b/@gridfile/metadata.m @@ -22,9 +22,7 @@ if ~exist('includeUndefined','var') || isempty(includeUndefined) includeUndefined = false; end -if ~isscalar(includeUndefined) || ~islogical(includeUndefined) - error('includeUndefined must be a scalar logical.'); -end +dash.assertScalarLogical(includeUndefined, 'includeUndefined'); % Extract the metadata meta = obj.meta; diff --git a/@gridfile/new.m b/@gridfile/new.m index 3a4fd730..1fb74369 100644 --- a/@gridfile/new.m +++ b/@gridfile/new.m @@ -41,13 +41,12 @@ end % Error check -dash.assertStrFlag( filename, "filename" ); +filename = dash.assertStrFlag( filename, "filename" ); if ~isempty(attributes) && (~isstruct(attributes) || ~isscalar(attributes)) error('attributes must be a scalar struct.'); -elseif ~islogical(overwrite) || ~isscalar(overwrite) - error('overwrite must be a scalar logical.'); end -gridfile.checkMetadataStructure( meta, dash.dimensionNames, "recognized dimension names" ); +dash.assertScalarLogical(overwrite, 'overwrite'); +meta = gridfile.checkMetadataStructure( meta, dash.dimensionNames, "recognized dimension names" ); % Ensure the file name has a .grid extension filename = char( filename ); @@ -79,7 +78,8 @@ % dimensions with undefined metadata. for d = 1:nDim if isfield(meta, dims(d)) - [metadata.(dims(d)), gridSize(d)] = gridfile.processMetadata(meta.(dims(d))); + metadata.(dims(d)) = meta.(dims(d)); + gridSize(d) = size(meta.(dims(d)), 1); isdefined(d) = true; else metadata.(dims(d)) = NaN; @@ -100,6 +100,7 @@ fieldLength = NaN(0, nField); maxLength = zeros(1, nField); dimLimit = NaN(nDim, 2, 0); +absolutePath = false(0,1); % Create the initial .grid file. % dims % The internal dimension order in the .grid file @@ -110,8 +111,9 @@ % fieldLength % Length of the primitive arrays for each source % maxLength % Length of the padded primitive arrays in the .grid file % dimLimit % The index limits of each data source along each dimension (nDim x 2 x nSource) +% absolutePath % Whether to store a data source file path exclusively as an absolute path save( filename, '-mat', 'dims', 'gridSize', 'isdefined', 'metadata', ... - 'source', 'fieldLength', 'maxLength', 'dimLimit' ); + 'source', 'fieldLength', 'maxLength', 'dimLimit', 'absolutePath' ); % Return grid object as output grid = gridfile( filename ); diff --git a/@gridfile/remove.m b/@gridfile/remove.m index e7741e81..6c567de7 100644 --- a/@gridfile/remove.m +++ b/@gridfile/remove.m @@ -23,10 +23,10 @@ var = []; end -% Error check -dash.assertStrFlag(file, "file"); +% Error check. Use string internally +file = dash.assertStrFlag(file, "file"); if ~isempty(var) - dash.assertStrFlag(var, "var"); + var = dash.assertStrFlag(var, "var"); end % Determine which sources match the file name @@ -49,6 +49,7 @@ obj.dimLimit(:,:,remove) = []; obj.fieldLength(remove,:) = []; obj.maxLength = max(obj.fieldLength,[],1); +obj.absolutePath(remove,:) = []; % Correct for empty max length sourceFields = fields(obj.source); diff --git a/@gridfile/renameSources.m b/@gridfile/renameSources.m index 2a778d1a..3eb89bdc 100644 --- a/@gridfile/renameSources.m +++ b/@gridfile/renameSources.m @@ -1,4 +1,4 @@ -function[] = renameSources( obj, name, newname, relativePath ) +function[] = renameSources( obj, name, newname, absolutePath ) %% Changes the file name associated with data sources to a new name. Useful % if data files are moved to a new location after being added to a .grid % file. @@ -21,7 +21,7 @@ % name and updates the path. If an element includes a partial file path or % just a file name, searches the active path for a matching file. % -% obj.renameSources( name, newname, relativePath ) +% obj.renameSources( name, newname, absolutePath ) % Specify whether to save new file names as absolute paths or as paths % relative to the .grid file location. If unspecified, uses whichever style % each data source used previously. @@ -36,9 +36,10 @@ % file name (including path), just file name, or an empty string. All % file names must include the extension. % -% relativePath: A scalar logical vector. Must have one element for each +% absolutePath: A scalar logical vector. Must have one element for each % element of "name". True elements indicate that the path for the file -% should be saved as a relative path. If false, saves an absolute path. +% should be saved as an absolute path. If false, saves the path relative +% to the .grid file. % Update the grid object in case the file changed obj.update @@ -49,8 +50,7 @@ name = obj.collectFullPaths(1:nSource); name(isfile(name)) = []; end -dash.assertStrList(name, "name"); -name = string(name); +name = dash.assertStrList(name, "name"); % Get the data sources associated with each file name. nFile = numel(name); @@ -66,22 +66,21 @@ if ~exist('newname','var') || isempty(newname) newname = strings(nFile,1); end -dash.assertStrList(newname,"newname") -newname = string(newname); +newname = dash.assertStrList(newname,"newname"); if numel(newname) ~= numel(name) error('newname must have one element for each element in name (%.f), but newname currently has %.f elements.', numel(name), numel(newname)); end % Default and error checking for relativePath -if ~exist('relativePath','var') || isempty(relativePath) +if ~exist('absolutePath','var') || isempty(absolutePath) [row, col] = find(fileSources); [~, first] = unique(col); - path = char(obj.collectPrimitives("file", row(first))); - relativePath = path(:,1)=='.'; -elseif ~isvector(relativePath) || ~islogical(relativePath) || numel(relativePath)~=numel(name) - error('relativePath must be a logical vector'); -elseif numel(relativePath) ~= numel(name) - error('relativePath must have one element for each element in name (%.f), but relativePath currently has %.f elements.', numel(name), numel(relativePath)); + absolutePath = obj.absolutePath( row(first) ); +end +if ~isvector(absolutePath) || ~islogical(absolutePath) || numel(absolutePath)~=numel(name) + error('absolutePath must be a logical vector'); +elseif numel(absolutePath) ~= numel(name) + error('absolutePath must have one element for each element in name (%.f), but absolutePath currently has %.f elements.', numel(name), numel(absolutePath)); end % Get the full new file paths and check the files exist @@ -107,7 +106,7 @@ obj.checkSourcesMatchGrid( sources, s ); % Implement the desired filepath style. Record the new field length - newname(f) = obj.sourceFilepath(newname(f), relativePath(f)); + newname(f) = obj.sourceFilepath(newname(f), absolutePath(f)); newLength(f) = numel( char(newname(f)) ); end @@ -129,6 +128,7 @@ s = find(fileSources(:,f)); obj.source.file(s,:) = newname(f,:); obj.fieldLength(s,k) = newLength(f); + obj.absolutePath(s) = absolutePath(f); end obj.save; diff --git a/@gridfile/repeatedLoad.m b/@gridfile/repeatedLoad.m index 293fedea..c2f2514a 100644 --- a/@gridfile/repeatedLoad.m +++ b/@gridfile/repeatedLoad.m @@ -88,41 +88,46 @@ limit = obj.dimLimit(d,:,useSource(s)); dimIndices = limit(1):limit(2); - % Get the indices of the requested data relative to the source grid - % and the output grid + % Get the indices of requested data in the source grid. Note that + % some .grid dimensions may not be in the data source. [ismem, sourceDim] = ismember(obj.dims(d), source.mergedDims); if ismem - [~, loc] = ismember( indices{d}, dimIndices ); + [~, loc] = ismember(indices{d}, dimIndices); sourceIndices{sourceDim} = loc(loc~=0); [~, outputIndices{d}] = ismember( dimIndices(sourceIndices{sourceDim}), indices{d} ); + else + [~, outputIndices{d}] = ismember( dimIndices, indices{d} ); end end - % Load the data from the data source + % Load the data from the data source. Match .grid dimension order Xsource = source.read( sourceIndices ); - - % Permute to match the order of the .grid dimensions. Add to output - dimOrder = 1:nDims; - [~, gridOrder] = ismember( obj.dims, source.mergedDims ); - gridOrder(gridOrder==0) = dimOrder(~ismember(dimOrder,gridOrder)); - X(outputIndices{:}) = permute(Xsource, gridOrder); + [~, order] = ismember(source.mergedDims, obj.dims); + X(outputIndices{:}) = dash.permuteToOrder(Xsource, order, nDims); end % Permute to match the requested dimension order dimOrder = 1:nDims; -inputOrder = [dimOrder(inputOrder), dimOrder(~ismember(dimOrder,inputOrder))]; -X = permute(X, inputOrder); -dims = obj.dims(inputOrder); -isdefined = obj.isdefined(inputOrder); +dimOrder = [dimOrder(inputOrder), dimOrder(~ismember(dimOrder,inputOrder))]; +X = permute(X, dimOrder); +dims = obj.dims(dimOrder); + +% Determine which dimensions to keep and which to remove. Keep dimensions +% that are defined or in the user input +inputDims = 1:numel(inputOrder); +isdefined = find(obj.isdefined(dimOrder)); +notdefined = find(~obj.isdefined(dimOrder)); +keep = [inputDims, isdefined(~ismember(isdefined, inputDims))]; +remove = notdefined(~ismember(notdefined, inputDims)); +% Arrange the metadata fields to match the dimension order if isfield(meta, obj.attributesName) - inputOrder(end+1) = max(inputOrder)+1; + dimOrder(end+1) = max(dimOrder)+1; end -meta = orderfields(meta, inputOrder); +meta = orderfields(meta, dimOrder); -% Remove any undefined singleton dimensions from the data and the metadata -order = [find(isdefined), find(~isdefined)]; -X = permute(X, order); -meta = rmfield( meta, dims(~isdefined) ); +% Remove any dimensions from the data and the metadata +X = permute(X, [keep, remove]); +meta = rmfield( meta, dims(remove) ); end \ No newline at end of file diff --git a/@gridfile/review.m b/@gridfile/review.m index 6a82b19f..c19d51c0 100644 --- a/@gridfile/review.m +++ b/@gridfile/review.m @@ -1,19 +1,44 @@ function[sources] = review(obj) -%% Error checks a .grid file prior to repeated loads. (The motivation for -% this method is to remove duplicate error checks during repeated load -% operations.) Returns a cell array of pre-built data sources for -% subsequent repeated load operations. +%% Reviews the data sources in a gridfile prior to a repeated load +% operation. Checks that each data source can be built and returns them in +% a cell vector. % -% sources = obj.review +% sources = obj.review; % % ----- Outputs ----- % -% sources: A cell vector of pre-built dataSource objects. +% sources: A cell vector of pre-built dataSource objects for the gridfile -% Pre-build the dataSource objects. This will check the sources are still -% valid. Also check the size and data type match the .grid file record. -s = 1:size(obj.fieldLength,1); -sources = obj.buildSources(s); -obj.checkSourcesMatchGrid(sources, s); +% Preallocate the sources +nSource = size(obj.fieldLength, 1); +sources = cell(nSource, 1); + +% Attempt to build each dataSource. +for s = 1:nSource + try + sources(s) = obj.buildSources(s); + + % Return informative error message if the dataSource could not be built + catch ME + badfile = obj.collectFullPaths(s); + str = 'The data source file may have been deleted or moved'; + if ~obj.absolutePath(s) + [~, badfile, badext] = fileparts(badfile); + badfile = strcat(badfile, badext); + str = strcat(str, ' relative to the .grid file. '); + end + + % Add message to error stack and throw + message = sprintf(['dataSource number %.f in .grid file "%s" is no ', ... + 'longer valid. This data source was associated with file "%s". ', ... + '%s. To update file paths, see "gridfile.renameSources".'], s, obj.file, badfile, str); + cause = MException('DASH:gridfile:invalidDataSource', message); + ME = addCause(ME, cause); + rethrow(ME); + end +end + +% Check that the data sources still match the values saved in the gridfile +obj.checkSourcesMatchGrid(sources, 1:nSource); end \ No newline at end of file diff --git a/@gridfile/rewriteMetadata.m b/@gridfile/rewriteMetadata.m index d4dc4118..0ec5bb54 100644 --- a/@gridfile/rewriteMetadata.m +++ b/@gridfile/rewriteMetadata.m @@ -11,16 +11,16 @@ % meta: The new metadata for the dimension. A numeric, logical, % char, string, cellstring, or datetime matrix. Each row is treated % as the metadata for one dimension element. Each row must be unique -% and cannot contain NaN, Inf, or NaT elements. Cellstring metadata +% and cannot contain NaN or NaT elements. Cellstring metadata % will be converted into the "string" type. % Update in case the file was changed. obj.update; % Error check -dash.assertStrFlag(dim, "dim"); +dim = dash.assertStrFlag(dim, "dim"); obj.checkAllowedDims(dim); -obj.checkMetadataField(meta, dim); +meta = obj.checkMetadataField(meta, dim); % Check the new metadata is the correct size if size(meta,1) ~= size(obj.meta.(dim),1) diff --git a/@gridfile/save.m b/@gridfile/save.m index 824e276c..1a7ecf23 100644 --- a/@gridfile/save.m +++ b/@gridfile/save.m @@ -10,7 +10,8 @@ fieldLength = obj.fieldLength; maxLength = obj.maxLength; dimLimit = obj.dimLimit; -save(obj.file, '-mat', 'dims','isdefined','gridSize','metadata','source','fieldLength','maxLength','dimLimit'); +absolutePath = obj.absolutePath; +save(obj.file, '-mat', 'dims','isdefined','gridSize','metadata','source','fieldLength','maxLength','dimLimit','absolutePath'); % Update the user object obj.update; diff --git a/@gridfile/sourceFilepath.m b/@gridfile/sourceFilepath.m index 59671103..8c5e7620 100644 --- a/@gridfile/sourceFilepath.m +++ b/@gridfile/sourceFilepath.m @@ -1,21 +1,21 @@ -function[path] = sourceFilepath(obj, path, relative) +function[path] = sourceFilepath(obj, path, absolute) %% Converts an absolute path for a data source to the path stored by the .grid file % -% path = obj.sourceFilepath(path, relative) -% Optionally converts a data source file path to a path relative to the -% .grid file. Implements UNIX style file separators. +% path = obj.sourceFilepath(path, absolute) +% Converts a data source file path to the path style requested for a .grid +% file. Implements UNIX style file separators. % % ----- Inputs ----- % % path: A filepath. A string. % -% relative: A scalar logical. +% absolute: A scalar logical. Whether to use an absolute path (true) or not. % % ----- Outputs ----- % -% path: The file path stored in the .grid file. +% path: The file path for the .grid file. -if relative +if ~absolute path = dash.relativePath( path, obj.file ); end path = dash.unixStylePath(path); diff --git a/@gridfile/update.m b/@gridfile/update.m index fe764320..36189e2d 100644 --- a/@gridfile/update.m +++ b/@gridfile/update.m @@ -10,7 +10,11 @@ end % Load the data in the .grid file -m = load(obj.file, '-mat'); +try + m = load(obj.file, '-mat'); +catch + error('Could not load gridfile data from "%s". It may not be a .grid file. If it is a .grid file, it may have become corrupted.', obj.file); +end % Update the gridfile properties differentName = ["size", "gridSize"; "meta","metadata"]; diff --git a/@gridfile/updateMetadataField.m b/@gridfile/updateMetadataField.m index e58524dc..c4a172f8 100644 --- a/@gridfile/updateMetadataField.m +++ b/@gridfile/updateMetadataField.m @@ -10,7 +10,8 @@ % meta: The new metadata field d = strcmp(dim, obj.dims); -[obj.meta.(dim), obj.size(d)] = gridfile.processMetadata(meta); +obj.meta.(dim) = meta; +obj.size(d) = size(meta, 1); obj.isdefined(d) = true; obj.save; diff --git a/@stateVector/add.m b/@stateVector/add.m new file mode 100644 index 00000000..afa8d8f9 --- /dev/null +++ b/@stateVector/add.m @@ -0,0 +1,67 @@ +function[obj] = add(obj, varName, file, autoCouple, overlap) +%% Adds a variable to a stateVector. +% +% obj = obj.add(varName, file) +% Adds a variable to the state vector from a .grid file. +% +% obj = obj.add(varName, file, autoCouple) +% Specify whether the variable should be automatically coupled to other +% variables in the state vector. Default is true. +% +% obj = obj.add(varName, file, autoCouple, overlap) +% Specify whether ensemble members for the variable can use overlapping +% information. Default is false. +% +% ----- Inputs ----- +% +% varName: A name to identify the variable in the state vector. A string +% scalar or character row vector. Use whatever name you find meaningful, +% does not need to match the name of anything in the .grid file. Cannot +% repeat the name of a variable already in the stateVector object. +% +% file: The name of the .grid file that holds data for the variable. A +% string scalar or character row vector. +% +% autoCouple: A scalar logical indicating whether to automatically couple +% the variable to other variables in the state vector (true -- default) +% or not (false). +% +% overlap: A scalar logical indicating whether ensemble members for the +% variable can use overlapping data. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Default for autoCouple and overlap +if ~exist('autoCouple','var') || isempty(autoCouple) + autoCouple = true; +end +if ~exist('overlap','var') || isempty(overlap) + overlap = false; +end + +% Error check, use string internally +dash.assertScalarLogical(overlap, 'overlap'); +dash.assertScalarLogical(autoCouple, 'autoCouple'); +dash.assertStrFlag(varName, 'varName'); +varName = string(varName); + +% Check the name is a valid variable name and not a duplicate +obj.checkVariableNames(varName, [], 'varName', 'add a new variable to'); +vars = obj.variableNames; +vars(end+1) = varName; + +% Create the new variable (error checks file). +newVar = stateVectorVariable(varName, file); +obj.variables = [obj.variables; newVar]; + +% Update variable coupling and overlap +obj.overlap(end+1, 1) = overlap; +obj.auto_Couple(end+1, 1) = autoCouple; +obj.coupled(end+1, end+1) = true; +if autoCouple + obj = obj.couple( vars(obj.auto_Couple) ); +end + +end \ No newline at end of file diff --git a/@stateVector/allowOverlap.m b/@stateVector/allowOverlap.m new file mode 100644 index 00000000..817851e3 --- /dev/null +++ b/@stateVector/allowOverlap.m @@ -0,0 +1,35 @@ +function[obj] = allowOverlap(obj, varNames, overlap) +%% Specify whether ensemble members for the listed variables are allowed to +% use overlapping (but non-duplicate) information. +% +% obj = obj.allowOverlap(varNames, overlap) +% +% ----- Inputs ----- +% +% varNames: The names of variables for which to set overlap options. A +% string vector or cellstring vector. +% +% overlap: A logical indicating whether to allow ensemble members to use +% overlapping information (true) or not (false -- default). Use a scalar +% logical to specify the same option for all listed variables. Use a +% logical vector to specify different options for different variables +% listed in varNames. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable indices +v = obj.checkVariables(varNames); + +% Error check overlap +if ~islogical(overlap) + error('overlap must be a logical'); +elseif ~isscalar(overlap) + dash.assertVectorTypeN(overlap, [], numel(v), 'Since overlap is not a scalar, it'); +end + +% Update +obj.overlap(v) = overlap; + +end \ No newline at end of file diff --git a/@stateVector/append.m b/@stateVector/append.m new file mode 100644 index 00000000..ee4490e7 --- /dev/null +++ b/@stateVector/append.m @@ -0,0 +1,58 @@ +function[obj] = append(obj, secondVector) +%% Appends another state vector to the end of the current vector. Couples +% any auto-couple variables in the current vector with auto-couple +% variables in the second vector. +% +% obj = obj.append(secondVector) +% +% ----- Inputs ----- +% +% secondVector: A second stateVector object +% +% ----- Outputs ----- +% +% obj: A stateVector object for the concatenated state vectors. + +% Error check +if ~isa(secondVector, 'stateVector') + error('secondVector must be a stateVector object'); +elseif ~isscalar(secondVector) + error('secondVector must be a scalar stateVector object.'); +end + +% Check there are no naming conflicts +title2 = secondVector.errorTitle; +if strcmp(title2, obj.defaultName) + title2 = "secondVector"; +end +obj.checkVariableNames(secondVector.variableNames, [], [], sprintf('append %s to', title2)); + +% Notify user of autocoupling +names1 = obj.variableNames; +names2 = secondVector.variableNames; +couple1 = names1(obj.auto_Couple); +couple2 = names2(secondVector.auto_Couple); +notifyAutocoupling(obj, couple1, couple2); + +% Update +new = numel(names1) + (1:numel(names2)); +obj.variables(new,1) = secondVector.variables; +obj.overlap(new,1) = secondVector.overlap; +obj.coupled(new, new) = secondVector.coupled; +obj.auto_Couple(new,1) = secondVector.auto_Couple; + +% Couple auto-coupled variables +obj = obj.couple([couple1; couple2]); + +end + +% Messages +function[] = notifyAutocoupling(obj, names1, names2) + +% Only notify if there are variables to couple and the user enabled +% notifications +if obj.verbose && numel(names1)>0 && numel(names2)>0 + fprintf('\nCoupling %s to %s.\n', dash.messageList(names1), dash.messageList(names2)); +end + +end \ No newline at end of file diff --git a/@stateVector/autoCouple.m b/@stateVector/autoCouple.m new file mode 100644 index 00000000..a27c5b9d --- /dev/null +++ b/@stateVector/autoCouple.m @@ -0,0 +1,35 @@ +function[obj] = autoCouple(obj, varNames, auto) +%% Specify whether to automatically couple variables to new variables added +% or appended to the stateVector. +% +% obj = obj.autoCouple(varNames, auto) +% +% ----- Inputs ----- +% +% varNames: A list of variable names for which auto-coupling options are +% being set. A string vector or cellstring vector. +% +% auto: True (default) indicates that a variable should be automatically +% coupled to new variables. False indicates that it should not. Use a +% scalar logical to specify the behavior for all listed variables. Use a +% logical behavior to specify the behavior for each variable listed in +% varNames. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable indices +v = obj.checkVariables(varNames); + +% Error check auto. +if ~islogical(auto) + error('auto must be a logical'); +elseif ~isscalar(auto) + dash.assertVectorTypeN(auto, [], numel(v), 'Since auto is not a scalar, it'); +end + +% Update +obj.auto_Couple(v) = auto; + +end \ No newline at end of file diff --git a/@stateVector/buildEnsemble.m b/@stateVector/buildEnsemble.m new file mode 100644 index 00000000..6433c66b --- /dev/null +++ b/@stateVector/buildEnsemble.m @@ -0,0 +1,238 @@ +function[X] = buildEnsemble(obj, nEns, random) +%% Builds a state vector ensemble. +% +% X = obj.buildEnsemble(nEns) +% Builds a state vector ensemble with a specified member of ensemble +% members. +% +% X = obj.buildEnsemble(nEns, random) +% Sepcfiy whether to select ensemble members at random, or sequentially. +% Default is random selection. +% +% ----- Inputs ----- +% +% nEns: The number of ensemble members +% +% random: Scalar logical. If true (default), selects ensemble members at +% random. If false, selects ensemble members sequentially. +% +% ----- Outputs ----- +% +% X: The state vector ensemble. A numeric matrix. (nState x nEns) + +%% Input error checks + +% Default +if ~exist('random','var') || isempty(random) + random = true; +end + +% Error check +dash.assertScalarLogical(random); +if ~isscalar(nEns) || ~isnumeric(nEns) + error('nEns must be a numeric scalar.'); +end +dash.assertPositiveIntegers(nEns, 'nEns'); + + +%% All variables: check dimensions, gridfiles, index limits, trim +% vf: Variable index associated with file +% f: File index associated with variable +% v: Index of variable in the state vector. + +% Get the .grid files associated with each variable. +files = dash.collectField(obj.variables, 'file'); +files = string(files); + +% Find the unique gridfiles. Preallocate data sources, grids, and the limits +% of each variable in the state vector +[files, vf, f] = unique(files); +nGrids = numel(files); +nVars = numel(obj.variables); +grids = cell(nGrids, 1); +sources = cell(nGrids, 1); +svLimit = zeros(nVars+1, 2); + +% Check that each variable has both state and ensemble dimensions +for v = 1:nVars + if ~any(obj.variables(v).isState) + badDimensionsError(obj.variables(v).name, true); + elseif ~any(~obj.variables(v).isState) + badDimensionsError(obj.variables(v).name, false); + end + + % Get the state vector index limits + svLimit(v+1, 1) = svLimit(v,2)+1; + svLimit(v+1, 2) = svLimit(v,2) + prod(obj.variables(v).stateSize); + + % Check that all gridfiles are valid. Pre-build the data sources + if ismember(v, vf) + try + grids{f(v)} = gridfile(obj.variables(v).file); + catch ME + badGridfileError(obj.variables(v), ME); + end + sources{f(v)} = grids{f(v)}.review; + end + obj.variables(v).checkGrid(grids{f(v)}); + + + % Trim reference indices to only allow complete means and sequences. + obj.variables(v) = obj.variables(v).trim; +end + +% Finish the state vector limits and preallocate the ensemble +svLimit(1,:) = []; +nState = svLimit(end, 2); +X = NaN(nState, nEns); + + +%% Coupled variables: match metadata, select ensemble members, remove overlap, build ensembles +% s: The index for a set of coupled variables +% v: The indices of variables in a set +% dims: ensemble dimensions +% d: Iterator for dims +% k: Iterator for v + +% Get the sets of coupled variables. Initialize selected and unused +% ensemble members. +sets = unique(obj.coupled, 'rows'); +nSets = size(sets, 1); +obj.subMembers = cell(nSets, 1); +obj.unused = cell(nSets, 1); + +% Get the ensemble dimensions associated with each set of coupled variables +for s = 1:nSets + v = find(sets(s,:)); + var1 = obj.variables(v(1)); + dims = var1.dims(~var1.isState); + + % Find metadata that is in all of the variables in the set. + for d = 1:numel(dims) + meta = var1.dimMetadata(grids{f(v(1))}, dims(d)); + for k = 2:numel(v) + varMeta = obj.variables(v(k)).dimMetadata( grids{f(v(k))}, dims(d) ); + try + meta = intersect(meta, varMeta, 'rows', 'stable'); + + % Informative errors if there is no overlap or different formats + catch + incompatibleFormatsError(obj, v(1), v(k), dims(d)); + end + if isempty(meta) + noMatchingMetadataError(obj.variableNames(v), dims(d)); + end + end + + % Update the reference indices in each variable to match the metadata + for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).matchIndices(meta, grids{f(v(k))}, dims(d)); + end + end + + % ***Note: At this point, the reference indices in the coupled + % variables are in the same order. The first reference index in each + % variable points to the same metadata-1. The second reference index in + % each points to the same metadata-2, etc. + + % Initialize a set of subscripted ensemble members + subMembers = NaN(nEns, numel(dims)); + unused = (1:prod(obj.variables(v(1)).ensSize))'; + nNeeded = nEns; + subIndexCell = cell(1, numel(dims)); + siz = var1.ensSize(~var1.isState); + + % Select ensemble members and optionally remove overlapping members + % until the ensemble is complete. + while nNeeded > 0 + if nNeeded > numel(unused) + notEnoughMembersError(obj.variableNames(v), obj.overlap(v)); + end + + % Select members randomly or in an ordered manner. Remove values + % from the unused members when selected + if random + unused = unused( randperm(numel(unused)) ); + end + members = unused(1:nNeeded); + unused(1:nNeeded) = []; + + % Get the subscript indices of ensemble members + [subIndexCell{:}] = ind2sub(siz, members); + subMembers(nEns-nNeeded+1:nEns, :) = cell2mat(subIndexCell); + + % Optionally remove ensemble members with overlapping data. Update + % the number of ensemble members needed + for k = 1:numel(v) + if ~obj.overlap(v(k)) + subMembers = obj.variables(v(k)).removeOverlap(subMembers, dims); + end + end + nNeeded = nEns - size(subMembers, 1); + end + + % Record the selected and unused ensemble members + obj.subMembers{s} = subMembers; + obj.unused{s} = unused; + + % Build the ensemble for each variable + for k = 1:numel(v) + varIndices = svLimit(v(k),1) : svLimit(v(k),2); + X(varIndices, :) = obj.variables(v(k)).buildEnsemble( ... + subMembers, dims, grids{f(v(k))}, sources{f(v(k))} ); + end +end + +end + +% Long error messages +function[] = badDimensionsError(name, noState) +type = "ensemble"; +if noState + type = "state"; +end +error(['Variable "%s" has no %s dimensions. See "stateVector.design" to ',... + 'specify %s dimensions.'], name, type, type); +end +function[] = badGridfileError(var, ME) +message = sprintf('Could not build the gridfile object for variable %s.', var.name); +cause = MException('DASH:stateVector:invalidGridfile', message); +ME = addCause(ME, cause); +rethrow(ME); +end +function[] = incompatibleFormatsError(obj, v1, v, dim) +error(['Coupled variables "%s" and "%s" use different metadata formats for ', ... + 'the "%s" dimension.'], obj.variables(v1).name, obj.variables(v).name, dim); +end +function[] = noMatchingMetadataError(varNames, dim) +error(['Cannot couple variables %s because they have no common metadata ', ... + 'along the "%s" dimension. This can occur when metadata for different ',... + 'variables are in different formats. If this is the case, consider using ',... + 'either the "stateVector.specifyMetadata" or "stateVector.convertMetadata" ',... + 'method.'], dash.messageList(varNames), dim); +end +function[] = notEnoughMembersError(varNames, overlap) +if numel(varNames) == 1 + str1 = "non-overlapping"; + str3 = 'or allowing overlap'; + if overlap + str1 = sprintf('\b'); + str3 = sprintf('\b'); + end + str2 = sprintf('variable "%s"', varNames); +else + str1 = sprintf('\b'); + str2 = sprintf('couple variables %s', dash.messageList(varNames)); + if sum(~overlap)==1 + str2 = strcat(str2, sprintf(' with no overlap for variable "%s"', varNames(overlap))); + elseif sum(~overlap)>1 + str2 = strcat(str2, sprintf(' with no overlap in variables %s', dash.messageList(varNames(overlap)))); + end + str3 = '.'; + if any(~overlap) + str3 = 'or allowing overlap'; + end +end +error(['Cannot find %.f %s ensemble members for %s. Consider using fewer ', ... + 'ensemble members %s.'], nEns, str1, str2, str3); +end \ No newline at end of file diff --git a/@stateVector/checkVariableNames.m b/@stateVector/checkVariableNames.m new file mode 100644 index 00000000..88744ff0 --- /dev/null +++ b/@stateVector/checkVariableNames.m @@ -0,0 +1,47 @@ +function[] = checkVariableNames(obj, newNames, v, inputName, methodName) +%% Checks that new variable names are valid and do not duplicate names +% already in the state vector. +% +% obj.checkVariableNames(newNames, v, inputName, methodName) +% +% ----- Inputs ----- +% +% newNames: The new variable names. A string vector or cellstring vector. +% +% v: The variable indices of the new names. If empty, uses end+1 +% +% inputName: The name of the input. Used for error messages. +% +% methodName: The name of the action being attempted. + +% Default index for the new names +if isempty(newNames) + v = numel(obj.variables)+(1:numel(newNames)); +end + +% Check that the new names are valid MATLAB variable names +if any(~isvarname(newNames)) + bad = find(~isvarname(newNames),1); + str = sprintf('The value of %s (%s)', inputName, newNames); + if numel(newNames)>1 + str = sprintf('Element %.f of %s (%s)', bad, inputName, newNames(bad)); + end + error(['%s is not a valid MATLAB variable name. Valid names must start ',... + 'with a letter and may only include letters, numbers, and underscores.'], str); +end + +% Combine new names with old +names = obj.variableNames; +names(v) = newNames; + +% Check for duplicates +[uniqNames, loc] = unique(names); +nNames = numel(names); +if nNames < numel(uniqNames) + bad = find(~ismember(1:nNames, loc), 1); + error(['Cannot %s %s because there would be multiple variables named "%s".', ... + 'If you want to change existing variable names, see "stateVector.renameVariable".'],... + methodName, obj.errorTitle, names(bad)); +end + +end \ No newline at end of file diff --git a/@stateVector/checkVariables.m b/@stateVector/checkVariables.m new file mode 100644 index 00000000..8812af39 --- /dev/null +++ b/@stateVector/checkVariables.m @@ -0,0 +1,35 @@ +function[v, varNames] = checkVariables(obj, varNames) +%% Returns the indices of state vector variables in the stateVector +% variables array. Returns an error if any variables do not exist. Convert +% variable names to string. +% +% v = obj.checkVariables(varNames) +% +% ----- Inputs ----- +% +% varNames: The names of the variables. A string vector or cellstring vector. +% +% ----- Outputs ----- +% +% v: The indices in the stateVector variables array. +% +% varNames: The names of variables as strings. + +% Option for empty +v = []; +if ~isempty(varNames) + + % Check the variables are in the state vector + listName = sprintf('variable in %s', obj.errorTitle); + v = dash.checkStrsInList(varNames, obj.variableNames, 'varNames', listName); + + % No duplicates + if numel(v) ~= numel(unique(v)) + error('varNames cannot repeat variable names.'); + end + + % Convert to string + varNames = string(varNames); +end + +end \ No newline at end of file diff --git a/@stateVector/convertMetadata.m b/@stateVector/convertMetadata.m new file mode 100644 index 00000000..25a773d4 --- /dev/null +++ b/@stateVector/convertMetadata.m @@ -0,0 +1,55 @@ +function[obj] = convertMetadata(obj, varNames, dim, convertFunction, functionArgs) +%% Specify how to convert the metadata along a dimension of variables in a +% state vector. +% +% obj = obj.convertMetadata(varNames, dim, convertFunction) +% Specifies a function to use to convert metadata along a particular +% dimension for specified variables. +% +% obj = obj.convertMetadata(varNames, dim, convertFunction, functionArgs) +% Specify additional input arguments for the metadata conversion function. +% +% ***Note: Metadata will be converted via +% >> convertedMetadata = convertFunction( metadata, functionArgs{:} ) +% so functionArgs should list input arguments 2-N. +% +% ----- Inputs ----- +% +% varNames: The names of variables in the state vector that should have +% their metadata converted. A string vector or cellstring vector. +% +% dim: The name of the dimension in the variables over which to apply the +% metadata conversion. A string scalar or character row vector. +% +% convertFunction: The function handle for the function being used to +% convert the metadata. The conversion function should convert metadata +% to a numeric, logical, char, string, cellstring, or datetime matrix. +% It must preserve the number of rows in the original metadata. Each row +% of the converted metadata will be used as the metadata for one element +% along the dimension. Converted metadata cannot contain NaN or NaT elements. +% +% functionArgs: A cell vector containing additional arguments that should +% be passed to the conversion function. Elements should be in the same +% order in which they should be passed to the conversion function. Note +% that the first input to the conversion function will be the metadata, +% so the first element of functionArgs will be the second input argument +% to the conversion function. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Default for functionArgs +if ~exist('functionArgs','var') + functionArgs = []; +end + +% Error check variables, get indices +v = obj.checkVariables(varNames); + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).convertMetadata(dim, convertFunction, functionArgs); +end + +end \ No newline at end of file diff --git a/@stateVector/copy.m b/@stateVector/copy.m new file mode 100644 index 00000000..04376a93 --- /dev/null +++ b/@stateVector/copy.m @@ -0,0 +1,105 @@ +function[obj] = copy(obj, templateName, varNames, varargin) +%% Copies design options from a template variable to other variables in a +% state vector. +% +% obj = obj.copy( templateName, varNames ) +% Copies all options from a template variable to other variables. This +% includes dimension types, state indices, reference indices, sequences, +% mean options, and weights for weighted means. +% +% obj = obj.copy( ..., 'sequence', copySequences ) +% Specify whether to copy sequence options. +% +% obj = obj.copy( ..., 'weightedMean', copyWeights ) +% Specify whether to copy weights for weighted means. +% +% obj = obj.copy( ..., 'mean', copyMeans ) +% Specify whether to copy mean options. Note that this does not include +% weights for a weighted mean. Only whether a mean is being taken over a +% dimension, mean indices, and NaN options. +% +% obj = obj.copy( ..., 'design', copyDesigns ) +% Specify whether to copy dimension types, state indices, and reference +% indices. +% +% obj = obj.copy( ..., 'metadata', copyMetadata ) +% Specify whether to copy metadata and metadata conversion functions. +% +% ----- Inputs ----- +% +% templateName: The name of a template variable. A string scalar or +% character row vector. +% +% varNames: The name of the variable to which options are being copied. A +% string vector or cellstring vector. +% +% copySequences: Scalar logical. If true (default), copies sequence options. +% If false, does not. +% +% copyMeans: Scalar logical. If true (default), copies mean options. If +% false, does not. +% +% copyWeights: Scalar logical. If true (default), copies weights for +% weighted means. If false, does not. +% +% copyDesigns: Scalar logical. If true (default), copies dimension types, +% state indices, and reference indices. If false, does not. +% +% copyMetadata: Scalar logical. If true (default), copies specified +% metadata and metadata conversion functions. If false, does not. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check template, template index, parse inputs +t = obj.checkVariables(templateName); +[copySequences, copyMeans, copyWeights, copyDesigns, copyMetadata] = dash.parseInputs( varargin, ... + ["sequence","mean","weightedMean","design","metadata"], {true, true, true, true, true}, 2 ); + +% Error check +dash.assertScalarLogical(copySequences, 'copySequences'); +dash.assertScalarLogical(copyMeans, 'copyMeans'); +dash.assertScalarLogical(copyWeights, 'copyWeights'); +dash.assertScalarLogical(copyDesigns, 'copyDesigns'); +dash.assertScalarLogical(copyMetadata, 'copyMetadata'); + +% Get the template variable +var = obj.variables(t); + +% Designs +if copyDesigns + obj = obj.design(varNames, var.dims, var.isState, var.indices); +end + +% Sequences +if copySequences + ens = ~var.isState; + obj = obj.sequence(varNames, var.dims(ens), var.seqIndices(ens), var.seqMetadata(ens)); +end + +% Means +if copyMeans + m = var.takeMean; + obj = obj.mean(varNames, var.dims(m), var.mean_Indices(m), var.omitnan(m)); +end + +% Weighted means +if copyWeights + w = var.hasWeights; + obj = obj.weightedMean(varNames, var.dims(w), var.weightCell(w)); +end + +% Metadata +if copyMetadata + d = find(var.hasMetadata); + for k = 1:numel(d) + obj = obj.specifyMetadata(varNames, var.dims(d(k)), var.metadata{d(k)}); + end + d = find(var.convert); + for k = 1:numel(d) + obj = obj.convertMetadata(varNames, var.dims(d(k)), var.convertFunction{d(k)}, var.convertArgs{d(k)} ); + end +end + +end \ No newline at end of file diff --git a/@stateVector/couple.m b/@stateVector/couple.m new file mode 100644 index 00000000..2a4db2e3 --- /dev/null +++ b/@stateVector/couple.m @@ -0,0 +1,68 @@ +function[obj] = couple(obj, varNames) +%% Couples variables in a state vector. Adjusts dimensions of coupled +% variables to match the state and ensemble dimensions of the first listed +% variable. +% +% obj = obj.couple(varNames) +% +% ----- Inputs ----- +% +% varNames: A list a variable names to be coupled. A string vector or +% cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Glossary of indices +% uv: User variables +% v: All variables being coupled +% sv: Secondary variables being coupled (those not listed by the user) +% t: Template variable (first user variable) + +% Error check. Get the indices of user variables and template variable. +uv = obj.checkVariables(varNames); +t = uv(1); + +% Find all variables coupled to those listed. Note any secondary coupled +% variables. +[~, col] = find(obj.coupled(uv,:)); +v = unique(col); +sv = v(~ismember(v, uv)); +notifySecondaryVariables(obj, uv, sv, t); + +% Update the coupled variables to match the template +obj = obj.updateCoupledVariables(t, v); + +% Couple the variables to one another +for k = 1:numel(v) + obj.coupled(v, v(k)) = true; + obj.coupled(v(k), v) = true; +end + +end + +% Message +function[] = notifySecondaryVariables(obj, uv, sv, t) + +% No message if no secondary variable, or user disabled messages +if ~isempty(sv) && obj.verbose + + % Plural vs singular variables + plural = ["s", "are"]; + if numel(sv)==1 + plural = ["", "is"]; + end + + % Format variable names as string + names = obj.variableNames; + template = names(t); + input = dash.messageList( names(uv) ); + secondary = dash.messageList( names(sv) ); + + % Message + fprintf(['\nVariable%s %s %s coupled to %s. Thus, %s will also be ', ... + 'coupled to "%s".\n'], plural(1), secondary, plural(2), input, secondary, template); +end + +end \ No newline at end of file diff --git a/@stateVector/design.m b/@stateVector/design.m new file mode 100644 index 00000000..97646cf1 --- /dev/null +++ b/@stateVector/design.m @@ -0,0 +1,99 @@ +function[obj] = design(obj, varNames, dims, type, indices) +%% Designs a dimension of a stateVectorVariable +% +% obj = obj.design(varNames, dim, type) +% obj = obj.design(varNames, dim, isState) +% Specifies a dimension as a state dimension or ensemble dimension. Uses +% all elements along the dimension as state indices or ensemble reference +% indices, as appropriate. +% +% obj = obj.design(varNames, dim, 's'/'state'/true, stateIndices) +% Specify state indices for a dimension. +% +% obj = obj.design(varNames, dim, 'e'/'ens'/'ensemble'/false, refIndices) +% Specify reference indices for an ensemble dimension. +% +% obj = obj.design(varNames, dims, isState/type, indexCell) +% Specify dimension type and indices for multiple dimensions. +% +% ----- Inputs ----- +% +% varNames: The names of the variables over which to design dimensions. A +% string vector or cellstring vector. +% +% dim(s): The name(s) of the variable's dimensions. A string vector, +% cellstring vector, or character row vector. +% +% type: Options are ("state" or "s") to indicate a state dimension, and +% ("ensemble" / "ens" / "e") to indicate an ensemble dimension. Use a +% string scalar to specify the same type for all dimensions listed in +% dims. Use a string vector to specify different options for the +% different dimensions listed in dims. +% +% isState: True indicates that a dimension is a state dimension. False +% indicates an ensemble dimension. Use a scalar logical to use the same +% type for all dimensions listed in dims. Use a logical vector to +% specify different options for the different dimensions listed in dims. +% +% stateIndices: The indices of required data along the dimension in the +% variable's .grid file. Either a vector of linear indices or a logical +% vector the length of the dimension. +% +% refIndices: The ensemble reference indices. Either a vector of linear +% indices or a logical vector the length of the dimension. +% +% indexCell: A cell vector. Each element contains the state indices or +% ensemble reference indices for a dimension listed in dims, as +% appropriate. Must be in the same order as dims. If an element is an +% empty array, uses all indices along the dimension. + +% Default for indices +if ~exist('indices','var') + indices = []; +end + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).design(dims, type, indices); + + % Find coupled variables not specified by the user. Notify that these + % will also be updated + sv = find(obj.coupled(v(k),:)); + sv = sv(~ismember(sv, v)); + notifySecondaryVariables(obj, sv, v(1)); + + % Update these variables. Add them to the variable list to prevent + % redundant updates / notifications + obj = obj.updateCoupledVariables(v(k), sv); + v = [v(:); sv(:)]; +end + +end + +% Message +function[] = notifySecondaryVariables(obj, sv, t) + +% No message if no secondary variables, or user disabled messages +if ~isempty(sv) && obj.verbose + + % Plural vs singular + plural = ["s", "are", "them"]; + if numel(sv)==1 + plural = ["", "is", "it"]; + end + + % Format variable names as string + names = obj.variableNames; + template = names(t); + secondary = dash.messageList( names(sv) ); + + % Message + fprintf(['\nVariable%s %s %s coupled to "%s". Updating %s to match the ',... + 'ensemble dimensions of %s.\n'], plural(1), secondary, plural(2), ... + template, plural(3), template); +end + +end \ No newline at end of file diff --git a/@stateVector/dimensions.m b/@stateVector/dimensions.m new file mode 100644 index 00000000..f59a6153 --- /dev/null +++ b/@stateVector/dimensions.m @@ -0,0 +1,74 @@ +function[dims] = dimensions(obj, varNames, type) +%% Displays the names of dimensions in specified variables +% +% obj.dimensions +% Prints a list of the dimensions in each variable to the console. +% +% obj.dimensions(varNames) +% Prints a list of dimensions for the specified variables to the console. +% +% obj.dimensions(varNames, type) +% Prints a list of state dimensions, ensemble dimensions, or all +% dimensions, as requested. +% +% [dims] = obj.dimensions(...) +% Returns the lists of dimensions as a structure array. Does not print to +% console. +% +% ----- Inputs ----- +% +% varNames: The names of the variables for which to return dimension names. +% A string vector or cellstring vector. +% +% type: A string scalar or character row vector indicating which dimensions +% to return. +% 'state' / 's': Return state dimensions +% 'ensemble' / 'ens' / 'e': Return ensemble dimensions +% 'all' / 'a': Return all dimensions +% +% ----- Outputs ----- +% +% dims: A string vector containing the names of the requested dimensions + +% Default, error check variable names +if ~exist('varNames','var') || isempty(varNames) + v = 1:numel(obj.variables); +else + v = obj.checkVariables(varNames); +end +nVars = numel(v); + +% Default, error check type +flags = ["state","s","ensemble","ens","e","all","a"]; +if ~exist('type','var') || isempty(type) + type = 'all'; +elseif ~any(strcmpi(type, flags)) + error('type must be one of the following strings: %s', dash.messageList(flags)); +end +type = lower(type); + +% Console output +if nargout==0 && nVars>0 + fprintf('\n'); + for k = 1:nVars + obj.variables(v(k)).dimensions(type); + end + fprintf('\n'); + +% Preallocate structure output +elseif nargout~=0 + field = "dimensions"; + if ismember(type, ["state","s"]) + field = "stateDimensions"; + elseif ismember(type, ["ensemble","ens","e"]) + field = "ensembleDimensions"; + end + dims = repmat( struct(field, []), [nVars, 1]); + + % Build output structure + for k = 1:nVars + dims(k).(field) = obj.variables(v(k)).dimensions(type); + end +end + +end \ No newline at end of file diff --git a/@stateVector/errorTitle.m b/@stateVector/errorTitle.m new file mode 100644 index 00000000..e7356b0e --- /dev/null +++ b/@stateVector/errorTitle.m @@ -0,0 +1,18 @@ +function[str] = errorTitle(obj) +% Returns a string identifying the stateVector object for use in error +% messages. +% +% str = obj.errorTitle +% Returns the string for the error message. +% +% ----- Outputs ----- +% +% str: An identifying string for error messages. + +if obj.name == "" + str = obj.defaultName; +else + str = sprintf('the stateVector "%s"', obj.name); +end + +end \ No newline at end of file diff --git a/@stateVector/extract.m b/@stateVector/extract.m new file mode 100644 index 00000000..b118a567 --- /dev/null +++ b/@stateVector/extract.m @@ -0,0 +1,26 @@ +function[obj] = extract(obj, varNames) +%% Returns a state vector that only includes the specified variables +% +% obj = obj.extract(varNames) +% +% ----- Inputs ----- +% +% varNames: The names of the variables that should remain in the state +% vector. String vector or cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Get the variables to remove +allVars = 1:numel(obj.variables); +remove = allVars(~ismember(allVars, v)); + +% Update +names = obj.variableNames; +obj = obj.remove( names(remove) ); + +end \ No newline at end of file diff --git a/@stateVector/info.m b/@stateVector/info.m new file mode 100644 index 00000000..1d656da1 --- /dev/null +++ b/@stateVector/info.m @@ -0,0 +1,108 @@ +function[vectorInfo, varInfo] = info(obj, varNames) +%% Returns information about a state vector and its variables +% +% obj.info +% Prints information about the state vector to console +% +% obj.info(v) +% obj.info(varNames) +% Prints information about specific variables in the state vector. +% +% [vectorInfo, varInfo] = obj.info( ... ) +% Returns state vector information as a structure and information about +% specified variables as a structure array. Does not print to console. +% +% ----- Inputs ----- +% +% varNames: A list of names of variables in the state vector. A string +% vector or cellstring vector. +% +% v: The indices of variables in the state vector. Either a vector of +% linear indices or a logical vector with one element for each variable. +% +% ----- Outputs ----- +% +% vectorInfo: A structure containing summary information about the state +% vector. +% +% varInfo: A structure array containing information on the specified +% variables in the state vector. + +% Default for unset variables +if ~exist('varNames','var') || isempty(varNames) + varNames = []; +end + +% Error check inputs +if dash.isstrlist(varNames) + v = obj.checkVariables(varNames); +elseif isnumeric(varNames) || islogical(varNames) + lengthName = 'the number of variables in the state vector'; + v = dash.checkIndices(varNames, 'v', numel(obj.variables), lengthName); +else + error('The first input must either be list of variable names or indices.'); +end + +% Summary information +name = obj.name; +title = obj.errorTitle; +title(1) = 'T'; +nVars = numel(obj.variables); +vars = obj.variableNames; +nState = 0; +for k = 1:nVars + nState = nState + prod(obj.variables(k).stateSize); +end +sets = unique(obj.coupled, 'rows'); +sets(sum(sets,2)==1, :) = []; +nSets = size(sets,1); +coupledNames = cell(nSets,1); +for s = 1:nSets + coupledNames{s} = obj.variableNames(sets(s,:)); +end + +% Structure output +if nargout > 0 + vectorInfo = struct('name', name, 'nVariables', nVars, 'variables', vars, ... + 'nState', nState); + vectorInfo.coupledVariables = coupledNames; + + % Preallocate the variable structure + nVars = numel(v); + varInfo = dash.preallocateStructs(stateVectorVariable.infoFields, [nVars, 1]); + +% Print to console +else + fprintf('\n%s has a length of %.f.\n', title, nState); + plural = "variables"; + if nVars==1 + plural = "variable"; + end + fprintf('It has %.f %s: %s\n', nVars, plural, dash.messageList(vars)); + + % Coupled variables + if nSets>0 + plural = ["are", "sets"]; + if nSets == 1 + plural = ["is", "set"]; + end + fprintf('There %s %.f %s of coupled variables\n', plural(1), nSets, plural(2)); + for s = 1:nSets + fprintf('\tVariables %s are coupled.\n', dash.messageList(coupledNames{s})); + end + end + fprintf('\n'); +end + +% Variable information +for k = 1:numel(v) + if nargout>0 + [varInfo(k), dimInfo] = obj.variables(v(k)).info; + varInfo(k).dimensions = dimInfo; + else + obj.variables(v(k)).info; + fprintf('\n'); + end +end + +end \ No newline at end of file diff --git a/@stateVector/mean.m b/@stateVector/mean.m new file mode 100644 index 00000000..aa666dd8 --- /dev/null +++ b/@stateVector/mean.m @@ -0,0 +1,75 @@ +function[obj] = mean(obj, varNames, dims, indices, omitnan) +%% Specifies options for taking a mean over dimensions for specified variables. +% +% obj = obj.mean(varNames, stateDim) +% obj = obj.mean(varNames, stateDim, []); +% Take a mean over a state dimension. +% +% obj = obj.mean(varNames, ensDim, indices); +% Specify how to take a mean over an ensemble dimension. +% +% obj = obj.mean(varNames, dims, indexCell) +% Specify how to take a mean over multiple dimensions. +% +% obj = obj.mean(varNames, dims, indices, nanflag) +% obj = obj.mean(varNames, dims, indices, omitnan) +% Specify how to treat NaN values when taking a mean +% +% ----- Inputs ----- +% +% varNames: The names of the variables over which to take a mean. A string +% vector or cellstring vector. +% +% stateDim: The name of a state dimension for the variable. A string. +% +% ensDim: The name of an ensemble dimension for the variable. A string. +% +% dims: The names of multiple dimensions. A string vector or cellstring +% vector. May not repeat dimension names. +% +% indices: Mean indices for an ensemble dimension. A vector of integers +% that indicates the position of mean data-elements relative to the +% sequence data-elements. 0 indicates a sequence data-element. 1 is the +% data-element following a sequence data-element. -1 is the data-element +% before a sequence data-element, etc. Mean indices may be in any order +% and cannot have a magnitude larger than the length of the dimension in +% the .grid file. +% +% indexCell: A cell vector. Each element contains mean indices for one +% dimension listed in dims. Must be in the same order as dims. Use an +% empty array for elements corresponding to state dimensions. +% +% nanflag: Options are "includenan" to use NaN values (default) and +% "omitnan" to remove NaN values. Use a string scalar to specify an +% option for all dimensions listed in dims. Use a string vector to +% specify different options for the different dimensions listed in dims. +% +% omitnan: If false (default) includes NaN values in a mean. If true, +% removes NaN values. Use a scalar logical to use the same option for +% all dimensions listed in dims. Use a logical vector to specify +% different options for the different dimensions listed in dims. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Defaults for unset variables +if ~exist('dims','var') + dims = []; +end +if ~exist('indices','var') + indices = []; +end +if ~exist('omitnan','var') + omitnan = []; +end + +% Error check. Variable indices +v = obj.checkVariables(varNames); + +% Update variables +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).mean(dims, indices, omitnan); +end + +end \ No newline at end of file diff --git a/@stateVector/notifyConsole.m b/@stateVector/notifyConsole.m new file mode 100644 index 00000000..b1702ceb --- /dev/null +++ b/@stateVector/notifyConsole.m @@ -0,0 +1,19 @@ +function[obj] = notifyConsole(obj, verbose) +%% Specify whether the stateVector should print notifications to the console +% +% obj = obj.notifyConsole( verbose ) +% +% ----- Input ----- +% +% verbose: A scalar logical. If true (default), the stateVector prints +% messages to the console. If false, it does not. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check. Set toggle +dash.assertScalarLogical(verbose, 'verbose'); +obj.verbose = verbose; + +end \ No newline at end of file diff --git a/@stateVector/remove.m b/@stateVector/remove.m new file mode 100644 index 00000000..01999dfe --- /dev/null +++ b/@stateVector/remove.m @@ -0,0 +1,28 @@ +function[obj] = remove(obj, varNames) +%% Removes specified variables from a state vector +% +% obj = obj.remove(varNames) +% +% ----- Inputs ----- +% +% varNames: The names of the variables that should be removed. A string +% vector or cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Remove each variable from the array +v = sort(v); +for k = numel(v):-1:1 + obj.variables(v(k)) = []; + obj.coupled(v(k), :) = []; + obj.coupled(:, v(k)) = []; + obj.auto_Couple(v(k)) = []; + obj.overlap(v(k)) = []; +end + +end \ No newline at end of file diff --git a/@stateVector/rename.m b/@stateVector/rename.m new file mode 100644 index 00000000..cc47918c --- /dev/null +++ b/@stateVector/rename.m @@ -0,0 +1,20 @@ +function[obj] = rename(obj, name) +%% Changes the identifying name of the state vector +% +% obj = obj.rename(name) +% Changes the vector's name. +% +% ----- Input ----- +% +% name: The new name for the state vector. A string scalar or character row +% vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check. Change name. Use string internally. +dash.assertStrFlag(name, 'name'); +obj.name = string(name); + +end \ No newline at end of file diff --git a/@stateVector/renameVariables.m b/@stateVector/renameVariables.m new file mode 100644 index 00000000..9916910c --- /dev/null +++ b/@stateVector/renameVariables.m @@ -0,0 +1,35 @@ +function[obj] = renameVariables(obj, varNames, newNames) +%% Changes the names of specified variables +% +% obj = obj.renameVariables(varNames, newNames) +% +% ----- Inputs ----- +% +% varNames: The current names of the variables being renamed. A string +% vector or cellstring vector. +% +% newNames: The new names of the variables. A string vector or cellstring +% vector with one element for each variable in varNames. Must be in the +% same order as varNames. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable names +v = obj.checkVariables(varNames); + +% Error check the new names +dash.assertStrList(newNames, 'newNames'); +newNames = string(newNames); +dash.assertVectorTypeN(newNames, [], numel(v), 'newNames'); + +% Check there are no naming conflicts and names are valid +obj.checkVariableNames(newNames, v, 'newNames', 'rename variables for'); + +% Rename +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).rename( newNames(k) ); +end + +end \ No newline at end of file diff --git a/@stateVector/resetMeans.m b/@stateVector/resetMeans.m new file mode 100644 index 00000000..41efedb1 --- /dev/null +++ b/@stateVector/resetMeans.m @@ -0,0 +1,42 @@ +function[obj] = resetMeans(obj, varNames, dims) +% Resets options for means for specified variables and dimensions. +% +% obj = obj.resetMeans +% Resets mean options for all variables and dimensions +% +% obj = obj.resetMeans(varNames) +% Resets mean options in all dimensions for specified variables. +% +% obj = obj.resetMeans(varNames, dims) +% Resets mean options for the specified dimensions. +% +% ----- Inputs ----- +% +% varNames: The names of variables for which mean options should be reset. +% A string vector or cellstring vector. +% +% dims: The names of dimensions in which to reset mean options. A string +% vector or cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Default and error check variables. Get indices +if ~exist('varNames','var') || isempty(varNames) + v = 1:numel(obj.variables); +else + v = obj.checkVariables(varNames); +end + +% Default for dims +if ~exist('dims','var') || isempty(dims) + dims = []; +end + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).resetMeans(dims); +end + +end \ No newline at end of file diff --git a/@stateVector/resetMetadata.m b/@stateVector/resetMetadata.m new file mode 100644 index 00000000..69be9c45 --- /dev/null +++ b/@stateVector/resetMetadata.m @@ -0,0 +1,42 @@ +function[obj] = resetMetadata(obj, varNames, dims) +% Resets metadata options for specified variables and dimensions. +% +% obj = obj.resetMetadata +% Resets metadata options for all variables and dimensions +% +% obj = obj.resetMetadata(varNames) +% Resets metadata options in all dimensions for specified variables. +% +% obj = obj.resetMetadata(varNames, dims) +% Resets metadata options for the specified dimensions. +% +% ----- Inputs ----- +% +% varNames: The names of variables for which metadata options should be reset. +% A string vector or cellstring vector. +% +% dims: The names of dimensions in which to reset metadata options. A string +% vector or cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Default and error check variables. Get indices +if ~exist('varNames','var') || isempty(varNames) + v = 1:numel(obj.variables); +else + v = obj.checkVariables(varNames); +end + +% Default for dims +if ~exist('dims','var') || isempty(dims) + dims = []; +end + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).resetMetadata(dims); +end + +end \ No newline at end of file diff --git a/@stateVector/sequence.m b/@stateVector/sequence.m new file mode 100644 index 00000000..42d191aa --- /dev/null +++ b/@stateVector/sequence.m @@ -0,0 +1,52 @@ +function[obj] = sequence(obj, varNames, dims, indices, metadata) +%% Use a sequence of data for ensemble dimensions in specified variables. +% +% obj = obj.sequence(varNames, dim, indices, metadata) +% Designs a sequence for an ensemble dimension and specifies sequence +% metadata. +% +% obj = obj.sequence(varNames, dims, indexCell, metadataCell) +% Designs a sequence and specifies metadata for multiple ensemble +% dimensions. +% +% ----- Inputs ----- +% +% varNames: The names of variables in the state vector for which to use a +% sequence. A string vector or cellstring vector. +% +% dim(s): The name(s) of ensemble dimension(s) in the .grid file for the +% variables. A string vector, cellstring vector, or character row vector. +% +% dims: The names of multiple ensemble dimensions. A string vector or +% cellstring vector. May not repeat dimension names. +% +% indices: The sequence indices. A vector of integers that indicates the +% position of sequence data-elements relative to the reference indices. +% 0 indicates the reference index. 1 is the data index following the +% reference index. -1 is the data index before the reference index, etc. +% Sequence indices may be in any order and cannot have a magnitude +% larger than the length of the dimension in the .grid file. +% +% indexCell: A cell vector. Each element contains the sequence indices for +% one dimension listed in dims. Must be in the same dimension order as +% dims. +% +% metadata: Metadata for the sequence. Either a vector with one element per +% sequence index or an array with one row per sequence index. +% +% metadataCell: A cell vector. Each element contains the metadata for one +% dimension listed in dims. Must be in the stame dimension order as dims +% +% ----- Output ----- +% +% obj: The updated stateVector object. + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).sequence(dims, indices, metadata); +end + +end \ No newline at end of file diff --git a/@stateVector/specifyMetadata.m b/@stateVector/specifyMetadata.m new file mode 100644 index 00000000..b502e429 --- /dev/null +++ b/@stateVector/specifyMetadata.m @@ -0,0 +1,32 @@ +function[obj] = specifyMetadata(obj, varNames, dim, metadata) +%% Specify metadata at the ensemble reference indices along a dimension of +% variables in a state vector. +% +% obj = obj.specifyMetadata(varNames, dim, metadata) +% +% ----- Inputs ----- +% +% varNames: The names of the variables over which to specify metadata. A +% string vector or cellstring vector. +% +% dim: The name of the dimension for which metadata is provided. A string +% +% metadata: Metadata at the reference indices for an ensemble dimension. +% Metadata may be numeric, logical, char, string, cellstring, or +% datetime matrix. Must have one row per reference index. Each row must +% be unique and cannot contain NaN or NaT elements. Cellstring +% metadata will be converted into the "string" type. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Update the variables +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).specifyMetadata(dim, metadata); +end + +end \ No newline at end of file diff --git a/@stateVector/stateVector.m b/@stateVector/stateVector.m new file mode 100644 index 00000000..2477c660 --- /dev/null +++ b/@stateVector/stateVector.m @@ -0,0 +1,147 @@ +classdef stateVector + % A class that designs a state vector and builds an ensemble. + % + % stateVector Methods: + % stateVector - Intializes a new state vector. + % add - Adds a new variable to a state vector. + % design - Options for dimensions of variables in a state vector. + % mean - Options for taking a mean + % weightedMean - Options for taking a weighted mean + % sequence - Options designing a sequence. + % copy - Copy options between variables + % buildEnsemble - Builds a state vector ensemble + % + % *** Summary Information *** + % stateVector Methods: + % variableNames - Returns a list of variables in the state vector + % dimensions - Returns a list of dimensions for variables + % info - Return a summary of a state vector + % + % *** Advanced *** + % stateVector Methods: + % autoCouple - Specify whether to automatically couple variables + % uncouple - Uncouple variables in a state vector + % couple - Couples variables + % allowOverlap - Enable overlap for variables in a state vector + % specifyMetadata - Specify metadata for variable dimensions + % convertMetadata - Convert metadata for variable dimensions + % + % *** Additional Options *** + % stateVector Methods: + % notifyConsole - Enable or disable console notifications + % rename - Change the name of a state vector + % renameVariables - Change the names of variables + % append - Concatenate state vectors + % extract - Get the state vector for specific variables + % remove - Removes a variable from a state vector + % resetMeans - Reset options for means + % resetMetadata - Reset options for metadata + + % ----- Written By ----- + % Jonathan King, University of Arizona, 2019-2020 + + properties (SetAccess = private) + name; % An optional identifier for the state vector + verbose; % Whether to print a messages to the console + + variables; % The array of variable designs + overlap; % Whether variable ensemble members can use overlapping, non-duplicate information + coupled; % Which variables are coupled + auto_Couple; % Whether to automatically couple a variable to new variables + + subMembers; % Subscripted ensemble members for each set of coupled variables + unused; % Unselected ensemble members for each set of coupled variables + end + + properties (Hidden, Constant) + defaultName = 'this stateVector'; + end + + % Constructor + methods + function obj = stateVector( name, verbose ) + % Creates a new state vector design. + % + % obj = stateVector; + % Initializes a new stateVector object. + % + % obj = stateVector(name) + % Includes an identifying name. + % + % obj = stateVector(name, verbose) + % Specify whether to print various messages to console. Default + % is true. + % + % ----- Inputs ----- + % + % name: An optional title for the state vector. A string scalar + % or character row vector. + % + % verbose: A scalar logical indicating whether to print various + % messages to the console (true -- default) or not (false). + % + % ----- Outputs ----- + % + % obj: A new, empty stateVector object. + + % Defaults + if ~exist('name','var') || isempty(name) + name = ""; + end + if ~exist('verbose','var') || isempty(verbose) + verbose = true; + end + + % Save name. Set console output + obj = obj.rename(name); + obj = obj.notifyConsole(verbose); + + % Initialize + obj.overlap = false(0,1); + obj.auto_Couple = false(0,1); + obj.coupled = false(0,0); + obj.variables = []; + end + end + + % Object utilities + methods + [v, varNames] = checkVariables(obj, varNames); + checkVariableNames(obj, newNames, v, inputName, methodName); + str = errorTitle(obj); + obj = updateCoupledVariables(obj, t, v); + end + + % User interface methods with stateVectorVariable + methods + obj = design(obj, varNames, dims, type, indices); + obj = sequence(obj, varNames, dims, indices, metadata); + obj = mean(obj, varNames, dims, indices, omitnan); + obj = weightedMean(obj, varNames, dims, weights); + obj = resetMeans(obj, varNames, dims); + obj = specifyMetadata(obj, varNames, dim, metadata); + obj = convertMetadata(obj, varNames, dim, convertFunction, functionArgs); + obj = resetMetadata(obj, varNames, dims); + dims = dimensions(obj, varNames, type); + [vectorInfo, varInfo] = info(obj, vars); + obj = renameVariables(obj, varNames, newNames); + X = buildEnsemble(obj, nEns, random); + end + + % User methods + methods + obj = rename(obj, name); + obj = notifyConsole(obj, verbose); + obj = add(obj, name, file, autoCouple); + obj = remove(obj, varNames); + obj = couple(obj, varNames); + obj = uncouple(obj, varNames); + obj = autoCouple(obj, varNames, auto); + obj = allowOverlap(obj, varNames, overlap); + obj = append(obj, secondVector); + obj = extract(obj, varNames); + obj = copy(obj, templateName, varNames, varargin); + varNames = variableNames(obj, v); + end + +end \ No newline at end of file diff --git a/@stateVector/uncouple.m b/@stateVector/uncouple.m new file mode 100644 index 00000000..2bd3aed9 --- /dev/null +++ b/@stateVector/uncouple.m @@ -0,0 +1,25 @@ +function[obj] = uncouple(obj, varNames) +%% Uncouples the specified variables within a state vector. +% +% obj = obj.uncouple(varNames) +% +% ----- Inputs ----- +% +% varNames: The names of the variables that should be uncoupled. A string +% vector or cellstring vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Uncouple the variables, but keep each variable coupled with itself. +for k = 1:numel(v) + obj.coupled(v, v(k)) = false; + obj.coupled(v(k), v) = false; + obj.coupled(v(k), v(k)) = true; +end + +end diff --git a/@stateVector/updateCoupledVariables.m b/@stateVector/updateCoupledVariables.m new file mode 100644 index 00000000..bd7d5654 --- /dev/null +++ b/@stateVector/updateCoupledVariables.m @@ -0,0 +1,78 @@ +function[obj] = updateCoupledVariables(obj, t, v) +%% Updates variables to match the ensemble dimensions of a template variable +% +% obj = obj.updateCoupledVariables(t, v) +% +% ----- Inputs ----- +% +% t: The index of the template variable +% +% v: The indices of variables being updated to match the template +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Get the ensemble dimensions of the template +ensDims = obj.variables(t).dims( ~obj.variables(t).isState ); + +% Check each variable has the required dimensions +for k = 1:numel(v) + var = obj.variables(v(k)); + missing = ~ismember(ensDims, var.dims); + if any(missing) + missingDimsError(missing, ensDims, var); + end + + % Check if any state dimensions need to be converted to ensemble, and + % vice versa. Notify user of changes. + varStateDims = var.dims(var.isState); + toEns = varStateDims( ismember(varStateDims, ensDims) ); + varEnsDims = var.dims(~var.isState); + toState = varEnsDims( ~ismember(varEnsDims, ensDims) ); + + % Change the dimension types. Notify user of changes. Save + var = var.design(toEns, 'ensemble'); + var = var.design(toState, 'state'); + notifyChangedDimensions(obj, v(k), t, toEns, toState); + obj.variables(v(k)) = var; +end + +end + +% Notification message +function[] = missingDimsError(missing, ensDims, var) +bad = find(missing,1); +error(['Cannot update coupled variable "%s" because "%s" does not have a ',... + '"%s" dimension.'], var.name, var.name, ensDims(bad)); +end +function[] = notifyChangedDimensions(obj, v, t, toEns, toState) + +% Only notify if dimensions are changing and user has not disabled messages +if obj.verbose && (numel(toEns)>0 || numel(toState)>0) + template = obj.variables(t).name; + thisVar = obj.variables(v).name; + fprintf('\nCoupling variable "%s" to "%s":\n', thisVar, template); + + % Converting to ensemble. + nEns = numel(toEns); + if nEns>0 + plural = ["state dimensions", "ensemble dimensions"]; + if nEns == 1 + plural = ["a state dimension", "an ensemble dimension"]; + end + fprintf('\tConverting %s from %s to %s.\n', dash.messageList(toEns), plural(1), plural(2)); + end + + % Converting to state + nState = numel(toState); + if nState>0 + plural = ["ensemble dimensions", "state dimensions"]; + if nState == 1 + plural = ["an ensemble dimension", "a state dimension"]; + end + fprintf('\tConverting %s from %s to %s.\n', dash.messageList(toState), plural(1), plural(2)); + end +end + +end \ No newline at end of file diff --git a/@stateVector/variableNames.m b/@stateVector/variableNames.m new file mode 100644 index 00000000..51e72bb4 --- /dev/null +++ b/@stateVector/variableNames.m @@ -0,0 +1,39 @@ +function[varNames] = variableNames(obj, v) +% Returns the names of the variables in a stateVector object. +% +% varNames = obj.variableNames; +% Returns the names of all variables in the state vector. +% +% varNames = obj.variableNames(v); +% Returns the names of the specified variables +% +% ----- Inputs ----- +% +% v: The indices of specific variables within the state vector. A vector +% of positive integers that do not exceed the number of variables OR a +% logical vector with one element per variable. +% +% ----- Outputs ----- +% +% varNames: A string vector of variable names. + +% Default and error check for v +nVars = numel(obj.variables); +if ~exist('v','var') || isempty(v) + v = 1:nVars; +elseif islogical(v) || isnumeric(v) + v = dash.checkIndices(v, 'v', nVars, 'the number of variables'); +else + error('v must either be a logical or numeric vector.'); +end + +% Default for empty state vector +varNames = strings(0,1); + +% Get the variable names +if ~isempty(obj.variables) + varNames = dash.collectField(obj.variables(v), 'name'); + varNames = string(varNames); +end + +end \ No newline at end of file diff --git a/@stateVector/weightedMean.m b/@stateVector/weightedMean.m new file mode 100644 index 00000000..03656674 --- /dev/null +++ b/@stateVector/weightedMean.m @@ -0,0 +1,49 @@ +function[obj] = weightedMean(obj, varNames, dims, weights) +%% Specify options for taking a weighted mean over dimensions of specified variables. +% +% obj = obj.weightedMean(varNames, dim, weights) +% Takes a weighted mean over a dimension. +% +% obj = obj.weightedMean(varNames, dims, weightCell) +% obj = obj.weightedMean(varNames, dims, weightArray) +% Takes a weighted mean over multiple dimensions. +% +% ----- Inputs ----- +% +% varNames: The names of the variables over which to take a mean. A string +% vector or cellstring vector. +% +% dim(s): The name(s) of dimension(s) over which to take a weighted mean. +% A string vector, cellstring vector, or character row vector. +% +% weights: A numeric vector containing the mean weights. If dim is a state +% dimension, must have a length equal to the number of state indices. +% If dim is an ensemble dimension, the length must be equal to the +% number of mean indices. (See stateVector.info to summarize dimension +% properties). May not contain NaN, Inf, or complex numbers. +% +% weightCell: A cell vector. Each element contains mean weights for one +% dimension listed in dims. Must be in the same order as dims. +% +% weightArray: An N-dimensional numeric array containing weights for taking +% a mean across specified dimensions. Must have a dimension for each +% dimension listed in dims and must have the same dimension order as +% dims. The length of each dimension of weightArray must be equal to +% either the number of state indices or mean indices, as appropriate. +% (See the "weights" input for details). May not contain NaN, Inf, or +% complex numbers. If an element of weightCell is an empty array, uses +% equal weights for elements along the associated dimension. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object + +% Error check, variable index +v = obj.checkVariables(varNames); + +% Update each variable +for k = 1:numel(v) + obj.variables(v(k)) = obj.variables(v(k)).weightedMean(dims, weights); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/addIndices.m b/@stateVectorVariable/addIndices.m new file mode 100644 index 00000000..53506137 --- /dev/null +++ b/@stateVectorVariable/addIndices.m @@ -0,0 +1,25 @@ +function[addIndex] = addIndices(obj, d) +%% Returns the add indices for a dimension. These are the mean indices +% propagated over the sequence elements +% +% addIndices = obj.addIndices(d) +% +% ----- Inputs ----- +% +% d: The index of an ensemble dimension +% +% ----- Outputs ----- +% +% addIndices: The add indices for the dimension as a column vector + +% Get the mean indices +meanIndices = obj.mean_Indices{d}; +if isempty(meanIndices) + meanIndices = 0; +end + +% Propagate over the sequence indices. Convert to column +addIndex = meanIndices + obj.seqIndices{d}'; +addIndex = addIndex(:); + +end \ No newline at end of file diff --git a/@stateVectorVariable/assertAddIndices.m b/@stateVectorVariable/assertAddIndices.m new file mode 100644 index 00000000..d394ced2 --- /dev/null +++ b/@stateVectorVariable/assertAddIndices.m @@ -0,0 +1,21 @@ +function[] = assertAddIndices(obj, indices, d, name) +%% Checks that an input is a valid set of sequence or mean indices. +% +% d = obj.assertAddIndices(indices, d, name) +% +% ----- Inputs ----- +% +% input: The sequence or mean indices being checked +% +% d: Dimension index for the indices +% +% name: The name of the input. A string. + +dash.assertVectorTypeN(indices, 'numeric', [], 'indices'); +if any(mod(indices,1)~=0) + error('%s must be a vector of integers.', name); +elseif any(abs(indices)>obj.gridSize(d)) + error('%s cannot have values with a magnitude larger than %.f (the length of the %s dimension in variable %s).', name, obj.gridSize(d), obj.dims(d), obj.name); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/buildEnsemble.m b/@stateVectorVariable/buildEnsemble.m new file mode 100644 index 00000000..8d129f20 --- /dev/null +++ b/@stateVectorVariable/buildEnsemble.m @@ -0,0 +1,109 @@ +function[X] = buildEnsemble(obj, subMembers, dims, grid, sources) +%% Builds an ensemble for the stateVectorVariable +% +% X = obj.buildEnsemble(subMembers, dims, sources) +% +% ----- Inputs ----- +% +% subMembers: A matrix of subscripted ensemble member indices. Each row +% holds the subscripted indices for one ensemble member. Each column is +% the indices for an ensemble dimension. +% +% dims: The names of ensemble dimensions in the order that appear in the +% columns of subMembers. A string vector or cellstring vector. +% +% sources: An array for data sources being called in a gridfile repeated +% load. See gridfile.review +% +% ----- Outputs ----- +% +% X: The ensemble for the variable. A numeric matrix. (nState x nEns) + +%% Means + +% Get nanflag and fill unspecified meanSize +nDims = numel(obj.dims); +nanflag = repmat("includenan", [1 nDims]); +nanflag(obj.omitnan) = "omitnan"; + +% Track the size and location of dimensions for taking means +obj.meanSize(isnan(obj.meanSize)) = 1; +siz = obj.stateSize .* obj.meanSize; +meanDims = 1:nDims; + +% Get the weights for each dimension with a mean +for k = 1:nDims + if obj.takeMean(k) + if isempty(obj.weightCell{k}) + obj.weightCell{k} = ones(obj.meanSize(k), 1); + end + + % Permute for singleton expansion + order = 1:max(2, meanDims(k)); + order(meanDims(k)) = 1; + order(1) = meanDims(k); + obj.weightCell{k} = permute(obj.weightCell{k}, order); + end +end + +%% Ensemble dimensions: indices and sequences + +% Initialize load indices with state indices. +indices = cell(1, nDims); +indices(obj.isState) = obj.indices(obj.isState); + +% Propagate mean indices over sequences to get add indices +d = obj.checkDimensions(dims); +addIndices = cell(1, numel(d)); +for k = 1:numel(d) + addIndices{k} = obj.addIndices(d(k)); + + % Note if size or mean dimensions change for sequences + if obj.stateSize(d(k))>1 + siz = [siz(1:d(k)-1), obj.meanSize(d(k)), obj.stateSize(d(k)), siz(d(k)+1:end)]; + meanDims(d(k)+1:end) = meanDims(d(k)+1:end)+1; + end +end + +%% Load individual ensemble members + +% Create the gridfile and preallocate the ensemble +nEns = size(subMembers, 1); +X = NaN( prod(obj.stateSize), nEns ); + +% Get load indices for each ensemble member +for m = 1:nEns + for k = 1:numel(d) + indices{d(k)} = obj.indices{d(k)}(subMembers(m,k)) + addIndices{k}; + end + + % Load the data. Reshape sequences + [Xm, ~, sources] = grid.repeatedLoad(1:nDims, indices, sources); + Xm = reshape(Xm, siz); + + % If taking a mean over a dimension, get the weights + for k = 1:nDims + if obj.takeMean(k) + w = obj.weightCell{k}; + + % If omitting NaN, propagate weights over matrix and infill NaN + if obj.omitnan(k) + nanIndex = isnan(Xm); + if any(nanIndex, 'all') + wSize = siz; + wSize( meanDims(find(obj.takeMean(1:k))) ) = 1; %#ok + w = repmat(w, wSize); + w(nanIndex) = NaN; + end + end + + % Take the mean + Xm = sum(w.*Xm, meanDims(k), nanflag(k)) ./ sum(w, meanDims(k), nanflag(k)); + end + end + + % Add the vector to the ensemble + X(:,m) = Xm(:); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/checkDimensions.m b/@stateVectorVariable/checkDimensions.m new file mode 100644 index 00000000..ae7f3d0a --- /dev/null +++ b/@stateVectorVariable/checkDimensions.m @@ -0,0 +1,49 @@ +function[d, dims] = checkDimensions(obj, dims, allowMultiple) +%% Returns the indices of dimensions in a state vector variable. Returns an +% error if any dimensions do not exist. Does not allow duplicate names. +% Also returns the dimension names as strings +% +% [d, dims] = obj.checkDimensions(dims) +% +% [d, dims] = obj.checkDimensions(dims, allowMultiple) +% +% ----- Inputs ----- +% +% dims: The input being checked as a list of dimension names. +% +% allowMultiple: A scalar logical indicating whether to allow multiple +% dimensions as input. Default is true +% +% ----- Outputs ----- +% +% d: The indices in the stateVectorVariable dims array +% +% dims: The dimension names as strings + +% Default and error check for allowMultiple +if ~exist('allowMultiple','var') || isempty(allowMultiple) + allowMultiple = true; +end + +% Option for empty dims +d = []; +if ~isempty(dims) + + % Check the dimensions are in the variable and get their index. + % Optionally check for a single input + listName = sprintf('dimension in the .grid file for the %s variable', obj.name); + d = dash.checkStrsInList(dims, obj.dims, 'dims', listName); + if ~allowMultiple && numel(d)>1 + error('dim can only list one dimension. It should be a string scalar or character row vector.'); + end + + % No duplicates + if numel(d) ~= numel(unique(d)) + error('dims cannot repeat dimension names.'); + end + + % Convert dims to string + dims = string(dims); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/checkGrid.m b/@stateVectorVariable/checkGrid.m new file mode 100644 index 00000000..734c3cd3 --- /dev/null +++ b/@stateVectorVariable/checkGrid.m @@ -0,0 +1,29 @@ +function[] = checkGrid(obj, grid) +%% Checks that a gridfile object matches the dimensions and size of the +% original grid associated with a stateVectorVariable +% +% obj.checkGrid(grid) +% +% ----- Inputs ----- +% +% grid: A gridfile object for the variable + +if ~isequal(obj.dims, grid.dims) + gridChangedError(obj, true); +elseif ~isequal(obj.gridSize, grid.size) + gridChangedError(obj, false); +end + +end + +% Long error message +function[] = gridChangedError(var, isDims) +tail = sprintf(['changed since variable "%s" was added to the state vector. ', ... + 'You may want to remove "%s" from the state vector and rebuild it.'], ... + var.name, var.name); +if isDims + error('The dimensions in .grid file "%s" have %s', var.file, tail); +else + error('The size of .grid file "%s" has %s', var.file, tail); +end +end \ No newline at end of file diff --git a/@stateVectorVariable/convertMetadata.m b/@stateVectorVariable/convertMetadata.m new file mode 100644 index 00000000..1c2e5bbd --- /dev/null +++ b/@stateVectorVariable/convertMetadata.m @@ -0,0 +1,69 @@ +function[obj] = convertMetadata(obj, dim, convertFunction, functionArgs) +%% Specify how to convert the metadata along a dimension of variables in a +% state vector. +% +% obj = obj.convertMetadata(dim, convertFunction) +% Specifies a function to use to convert metadata along a particular +% dimension. +% +% obj = obj.convertMetadata(dim, convertFunction, functionArgs) +% Specify additional input arguments for the metadata conversion function. +% +% ***Note: Metadata will be converted via +% >> convertedMetadata = convertFunction( metadata, functionArgs{:} ) +% so functionArgs should list input arguments 2-N. +% +% ----- Inputs ----- +% +% dim: The name of the dimension in the variables over which to apply the +% metadata conversion. A string scalar or character row vector. +% +% convertFunction: The function handle for the function being used to +% convert the metadata. The conversion function should convert metadata +% to a numeric, logical, char, string, cellstring, or datetime matrix. +% It must preserve the number of rows in the original metadata. Each row +% of the converted metadata will be used as the metadata for one element +% along the dimension. Converted metadata cannot contain NaN or NaT elements. +% +% functionArgs: A cell vector containing additional arguments that should +% be passed to the conversion function. Elements should be in the same +% order in which they should be passed to the conversion function. Note +% that the first input to the conversion function will be the metadata, +% so the first element of functionArgs will be the second input argument +% to the conversion function. +% +% ----- Outputs ----- +% +% obj: The updated stateVector object. + +% Error check, dimension index. Cannot conflict with previous metadata +d = obj.checkDimensions(dim, false); +if any(obj.hasMetadata(d)) + previousMetadataError(obj, d); +end + +% Error check function. Default and error check args. +if ~isa(convertFunction, 'function_handle') || ~isscalar(convertFunction) + error('convertFunction must be a scalar function handle.'); +end +if ~exist('functionArgs','var') || isempty(functionArgs) + functionArgs = {}; +else + dash.assertVectorTypeN(functionArgs, 'cell', [], 'functionArgs'); +end + +% Update +obj.convert(d) = true; +obj.convertFunction{d} = convertFunction; +obj.convertArgs{d} = functionArgs; + +end + +% Error message +function[] = previousMetadataError(obj, d) +bad = d(find(obj.convert(d),1)); +error('Cannot specify a metadata conversion function for the "%s" dimension ',... + 'of variable "%s" because you previously specified metadata for the ',... + 'reference indices of this dimension. You may want to reset the metadata ',... + 'options using "stateVector.resetMetadata".', obj.dims(bad), obj.name); +end \ No newline at end of file diff --git a/@stateVectorVariable/design.m b/@stateVectorVariable/design.m new file mode 100644 index 00000000..5957e949 --- /dev/null +++ b/@stateVectorVariable/design.m @@ -0,0 +1,152 @@ +function[obj] = design(obj, dims, type, indices) +%% Designs a dimension of a stateVectorVariable +% +% obj = obj.design(dim, type) +% obj = obj.design(dim, isState) +% Specifies a dimension as a state dimension or ensemble dimension. Uses +% all elements along the dimension as state indices or ensemble reference +% indices, as appropriate. +% +% obj = obj.design(dim, 's'/'state'/true, stateIndices) +% Specify state indices for a dimension. +% +% obj = obj.design(dim, 'e'/'ens'/'ensemble'/false, refIndices) +% Specify reference indices for an ensemble dimension. +% +% obj = obj.design(dims, isState/type, indexCell) +% Specify dimension type and indices for multiple dimensions. +% +% ----- Inputs ----- +% +% dim: The name of one of the variable's dimensions. A string. +% +% dims: The names of multiple dimensions. A string vector or cellstring +% vector. May not repeat dimension names. +% +% type: Options are ("state" or "s") to indicate a state dimension, and +% ("ensemble" / "ens" / "e") to indicate an ensemble dimension. Use a +% string scalar to specify the same type for all dimensions listed in +% dims. Use a string vector to specify different options for the +% different dimensions listed in dims. +% +% isState: True indicates that a dimension is a state dimension. False +% indicates an ensemble dimension. Use a scalar logical to use the same +% type for all dimensions listed in dims. Use a logical vector to +% specify different options for the different dimensions listed in dims. +% +% stateIndices: The indices of required data along the dimension in the +% variable's .grid file. Either a vector of linear indices or a logical +% vector the length of the dimension. +% +% refIndices: The ensemble reference indices. Either a vector of linear +% indices or a logical vector the length of the dimension. +% +% indexCell: A cell vector. Each element contains the state indices or +% ensemble reference indices for a dimension listed in dims, as +% appropriate. Must be in the same order as dims. If an element is an +% empty array, uses all indices along the dimension. + +% Error check, dimension index +[d, dims] = obj.checkDimensions(dims); +nDims = numel(d); + +% Parse, error check the dimension type. Save +isState = obj.parseLogicalString(type, nDims, 'isState', 'type', ... + ["state","s","ensemble","ens","e"], 2, 'The dimension type'); +obj.isState(d) = isState; + +% Parse indices. Get input name +if ~exist('indices','var') || isempty(indices) + indices = cell(1, nDims); +end +[indices, wasCell] = obj.parseInputCell(indices, nDims, 'indexCell'); +name = 'stateIndices'; +if ~wasCell && ~isState + name = 'ensIndices'; +end + +% Use all indices if unspecified. Error check +for k = 1:nDims + if isempty(indices{k}) + indices{k} = (1:obj.gridSize(d(k)))'; + end + if wasCell + name = sprintf('Element %.f of indexCell', k); + end + lengthName = sprintf('the length of the %s dimension', obj.dims(d(k))); + indices{k} = dash.checkIndices(indices{k}, name, obj.gridSize(d(k)), lengthName); + + % State dimension + if obj.isState(d(k)) + obj.stateSize(d(k)) = numel(indices{k}); + obj.ensSize(d(k)) = 1; + obj.indices{d(k)} = indices{k}(:); + + % Reset ensemble properties + obj.seqIndices{d(k)} = []; + obj.seqMetadata{d(k)} = []; + + obj.hasMetadata(d(k)) = false; + obj.metadata{d(k)} = []; + obj.convert(d(k)) = false; + obj.convertFunction{d(k)} = []; + obj.convertArgs{d(k)} = []; + + % Update mean properties + obj.mean_Indices{d(k)} = []; + if obj.takeMean(d(k)) + if obj.hasWeights(d(k)) && obj.meanSize(d(k))~=obj.stateSize(d(k)) + weightsNumberError(obj, dims(k), obj.stateSize(d(k)), obj.meanSize(d(k))); + end + obj.meanSize(d) = obj.stateSize(d); + obj.stateSize(d) = 1; + end + + % Ensemble dimension + else + obj.indices{d(k)} = indices{k}(:); + obj.stateSize(d(k)) = 1; + obj.ensSize(d(k)) = numel(indices{k}); + + % Check for a conflict with metadata + if obj.hasMetadata(d(k)) && size(obj.metadata{d(k)},1)~=obj.ensSize(d(k)) + metadataConflictError(obj, d); + end + + % If converting from state, initialize sequence properties and + % check for meanIndices conflict + if isempty(obj.seqIndices{d(k)}) + obj.seqIndices{d(k)} = 0; + obj.seqMetadata{d(k)} = NaN; + if obj.takeMean(d(k)) + ensMeanError(obj, dims(d(k))); + end + end + end +end + +end + +% Long error messages +function[] = weightsNumberError( obj, dim, nIndex, nWeights ) +error(['Cannot convert the "%s" dimension of variable "%s" to a state ',... + 'dimension because %s is being used in a weighted mean, and the number ',... + 'of state indices (%.f) does not match the number of mean weights ',... + '(%.f). Either use %.f state indices or reset the mean options using ',... + '"stateVector.resetMeans".'], dim, obj.name, dim, nIndex, nWeights, nWeights); +end +function[] = metadataConflictError(obj, d) +oldSize = size(obj.metadata{d}, 1); +newSize = obj.ensSize(d); +error(['The new number of reference indices (%.f) for dimension "%s" of ',... + 'variable "%s" does not match the number of rows in the specified ', ... + 'metadata (%.f). Either use %.f indices or reset the metadata options ',... + 'using "stateVector.resetMetadata".'], newSize, obj.dims(d), obj.name, ... + oldSize, oldSize); +end +function[] = ensMeanError(obj, dim) +error(['Cannot convert dimension "%s" of variable "%s" to an ensemble ',... + 'dimension because it is being used in a mean and there are no mean ',... + 'indices. You may want to reset the mean options using ',... + '"stateVector.resetMeans".'], dim, obj.name); +end \ No newline at end of file diff --git a/@stateVectorVariable/dimMetadata.m b/@stateVectorVariable/dimMetadata.m new file mode 100644 index 00000000..cc8c45c6 --- /dev/null +++ b/@stateVectorVariable/dimMetadata.m @@ -0,0 +1,74 @@ +function[meta] = dimMetadata(obj, grid, dim) +%% Gets the metadata along a dimension at the variable's indices along the +% dimension. +% +% meta = obj.dimMetadata(grid, dim) +% +% ----- Inputs ----- +% +% grid: The gridfile object associated with the variable +% +% dim: The name of the dimension. A string +% +% ----- Outputs ----- +% +% meta: The metadata along the dimension + +% Load any metadata provided directly +d = obj.checkDimensions(dim); +if obj.hasMetadata(d) + meta = obj.metadata{d}; + +% Otherwise, determine from indices and gridfile metadata +else + meta = grid.meta.(dim)(obj.indices{d}, :); + + % Setup if converting metadata + if obj.convert(d) + nRows = size(meta, 1); + funcInfo = functions(obj.convertFunction{d}); + funcName = funcInfo.function; + + % Run the conversion function + try + meta = obj.convertFunction{d}(meta, obj.convertArgs{d}{:}); + catch ME + failedConversionError(obj.name, dim, funcName, 1, ME); + end + + % Check that converted metadata is still valid. Convert cellstring to + % string. + try + meta = gridfile.checkMetadataField(meta); + catch ME + failedConversionError(obj.name, dim, funcName, 2, ME); + end + if size(meta, 1) ~= nRows + failedConversionError(obj.name, dim, funcName, 3, ME, size(meta,1), nRows); + end + end +end + +end + +% Error messages +function[] = failedConversionError(var, dim, func, type, ME, newRows, oldRows) +head = sprintf(['Could not convert metadata for the "%s" dimension of ',... + 'variable "%s" using the "%s" function'], dim, var, func); +if type == 1 + reason = sprintf(['because "%s" threw an error when given the "%s" metadata ', ... + '(and any additional specified arguments) as input.'], func, dim); +elseif type == 2 + reason = ['because the converted metadata was not valid gridfile metadata. ', ... + 'Converted metadata must be a numeric, logical, char, string, cellstring, or ',... + 'datetime matrix. Each row must be unique and cannot contain NaN or NaT elements.']; +elseif type == 3 + reason = sprintf(['because the number of rows in the converted metadata ',... + '(%.f) does not match the number of rows in the original metadata (%.f).'], ... + newRows, oldRows); +end +message = sprintf('%s %s', head, reason); +cause = MException('DASH:stateVector:failedMetadataConversion', message); +ME = addCause(ME, cause); +rethrow(ME); +end \ No newline at end of file diff --git a/@stateVectorVariable/dimensions.m b/@stateVectorVariable/dimensions.m new file mode 100644 index 00000000..59ab65b0 --- /dev/null +++ b/@stateVectorVariable/dimensions.m @@ -0,0 +1,42 @@ +function[dims] = dimensions(obj, type) +%% Displays the dimensions of a variable. +% +% obj.dimensions(type) +% Prints a list of state dimensions, ensemble dimensions, or all +% dimensions, as requested. +% +% [dims] = obj.dimensions(type) +% Returns the dimensions as a string vector. Does not print to console. +% +% ----- Inputs ----- +% +% type: A string scalar or character row vector indicating which dimensions +% to return. +% 'state' / 's': Return state dimensions +% 'ensemble' / 'ens' / 'e': Return ensemble dimensions +% 'all' / 'a': Return all dimensions +% +% ----- Outputs ----- +% +% dims: A string vector containing the names of the requested dimensions + +% Only use non-singleton grid dimensions +dims = obj.dims(obj.gridSize>1); +isState = obj.isState(obj.gridSize>1); + +% Get the dims and string for different dimensions +str = "D"; +if ismember(type, ["state","s"]) + dims = dims(isState); + str = "State d"; +elseif ismember(type, ["ensemble","ens","e"]) + dims = dims(~isState); + str = "Ensemble d"; +end + +% Console output +if nargout==0 + fprintf('%simensions for variable "%s": %s\n', str, obj.name, dash.messageList(dims)); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/info.m b/@stateVectorVariable/info.m new file mode 100644 index 00000000..79d708d7 --- /dev/null +++ b/@stateVectorVariable/info.m @@ -0,0 +1,173 @@ +function[varInfo, dimInfo] = info(obj) +%% Returns information about a stateVectorVariable +% +% obj.info +% Prints information about the state vector variable to the console. +% +% [varInfo, dimInfo] = obj.info +% Returns summary variable information as a structure, and dimension info +% as a structure array. Does not print to console. +% +% ----- Outputs ----- +% +% varInfo: A structure containing information on the state vector variable +% +% dimInfo: A structure containing information on the dimensions of the +% state vector variable + +% Variable summary +name = obj.name; +file = obj.file; +stateSize = prod(obj.stateSize); +obj = obj.trim; +ensSize = prod(obj.ensSize); +singleDims = obj.dims(obj.gridSize==1); +dims = obj.dims(obj.gridSize~=1); +stateDims = obj.dims(obj.isState & obj.gridSize>1); +ensDims = obj.dims(~obj.isState & obj.gridSize>1); + +% Output structure +if nargout~=0 + input = cell(1, numel(obj.infoFields)*2); + input(1:2:end) = obj.infoFields; + input(2:2:end) = {name, file, stateSize, ensSize, dims, stateDims, ensDims, singleDims}; + varInfo = struct( input{:} ); + + % Preallocate dimension structure + nDims = numel(dims); + dimFields = {"name","type","stateLength","ensembleMembers","indices",... + "sequence","hasMean","meanIndices","weights"}; + [dimInfo, dimInputs] = dash.preallocateStructs(dimFields, [nDims, 1]); + +% Print to console +else + fprintf('"%s" is a state vector variable.\n', obj.name); + fprintf('Data for %s is organized by .grid file "%s"\n', obj.name, obj.file); + statePlural = "s"; + if stateSize == 1 + statePlural = ""; + end + ensPlural = ["are", "s"]; + if ensSize==1 + ensPlural = ["is",""]; + end + fprintf('The state vector for %s is %.f element%s long. There %s %.f possible ensemble member%s.\n', ... + name, stateSize, statePlural, ensPlural(1), ensSize, ensPlural(2)); +end + +% Cycle through state dimensions first +sd = find(obj.isState & obj.gridSize>1); +ed = find(~obj.isState & obj.gridSize>1); +alldims = [sd, ed]; + +% Dimension info +for k = 1:numel(alldims) + d = alldims(k); + name = obj.dims(d); + indices = obj.indices{d}; + hasMean = obj.takeMean(d); + weights = NaN; + if obj.hasWeights(d) + weights = obj.weightCell{d}; + end + + % State specific + if obj.isState(d) + type = "state"; + stateSize = obj.stateSize(d); + ensSize = NaN; + sequence = NaN; + meanIndices = NaN; + + % Ensemble specific + else + type = "ensemble"; + stateSize = obj.stateSize(d); + ensSize = obj.ensSize(d); + sequence = obj.seqIndices{d}; + meanIndices = obj.mean_Indices{d}; + end + + % Output structure + if nargout>0 + dimInputs(2:2:end) = {name, type, stateSize, ensSize, indices, sequence, ... + hasMean, meanIndices, weights}; + dimInfo(k) = struct(dimInputs{:}); + + % Print to console + else + % State dimension header + if strcmp(type,'state') + if d==sd(1) + fprintf('\n\tSTATE DIMENSIONS: %s\n', dash.messageList(stateDims)); + end + fprintf('\t%s has a length of %.f in the state vector.\n', name, stateSize); + + + % String for mean + if stateSize>1 || (hasMean && obj.meanSize(d)>1) + if obj.meanSize(d)==1 || ~hasMean + meanStr = sprintf('\b'); + elseif obj.hasWeights(d) + meanStr = sprintf('a weighted mean of %.f data elements', obj.meanSize(d)); + elseif hasMean + meanStr = sprintf('a mean of %.f data elements', obj.meanSize(d)); + end + + % String for spacing + spaceStr = ''; + spacing = unique(diff(sort(indices))); + if numel(spacing)==1 && spacing==1 + spaceStr = sprintf('spaced in steps of 1 data index'); + elseif numel(spacing)==1 + spaceStr = sprintf('spaced in steps of %.f data indices', spacing); + end + + % Final string + if numel(spacing)==1 || (hasMean && obj.meanSize(d)>1) + fprintf('\t\tIt is %s %s\n', meanStr, spaceStr); + end + end + + % Ensemble dimension header + else + if d==ed(1) + fprintf('\n\tENSEMBLE DIMENSIONS: %s\n', dash.messageList(ensDims)); + end + fprintf('\t%s has %.f elements that can be used in an ensemble.\n', name, numel(indices)); + + % Sequence information + if ~isequal(sequence, 0) + strs = ["a sequence of data elements:", "data indices"]; + if numel(sequence)==1 + strs(1) = "the data element"; + end + if isequal(abs(sequence), 1) + strs(2) = "index"; + end + list = dash.messageList(sequence); + fprintf('\t\tIt has a length of %.f in the state vector.\n', stateSize); + fprintf('\t\tIt uses %s %s %s after each reference element.\n', strs(1), list, strs(2)); + end + + % Mean information + if hasMean && ~isequal(meanIndices,0) + weighted = sprintf('\b'); + if obj.hasWeights(d) && obj.meanSize(d)>1 + weighted = 'weighted'; + end + strs = ["elements", "data indices"]; + if numel(meanIndices)==1 + strs(1) = "element"; + end + if isequal(abs(meanIndices),1) + strs(2) = "index"; + end + list = dash.messageList(meanIndices); + fprintf('\t\tIt takes a %s mean over the data %s %s %s after each sequence element.\n', weighted, strs(1), list, strs(2)); + end + end + end +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/matchIndices.m b/@stateVectorVariable/matchIndices.m new file mode 100644 index 00000000..6c8a4d89 --- /dev/null +++ b/@stateVectorVariable/matchIndices.m @@ -0,0 +1,30 @@ +function[obj] = matchIndices(obj, meta, grid, dim) +%% Updates indices along an ensemble dimension to match metadata. +% +% obj = obj.matchIndices(meta, grid, dim) +% +% ----- Inputs ----- +% +% meta: The metadata to match +% +% grid: The gridfile object associated with the stateVectorVariable +% +% dim: The name of the dimension. A string +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object. + +% Get the variable's metadata. Find the metadata intersect +varMeta = obj.dimMetadata(grid, dim); +[~, keep] = intersect(varMeta, meta, 'rows', 'stable'); + +% Update the reference indices. Update metadata and size +d = obj.checkDimensions(dim); +obj.indices{d} = obj.indices{d}(keep); +if obj.hasMetadata(d) + obj.metadata{d} = obj.metadata{d}(keep, :); +end +obj.ensSize(d) = numel(obj.indices{d}); + +end \ No newline at end of file diff --git a/@stateVectorVariable/mean.m b/@stateVectorVariable/mean.m new file mode 100644 index 00000000..35e89c77 --- /dev/null +++ b/@stateVectorVariable/mean.m @@ -0,0 +1,128 @@ +function[obj] = mean(obj, dims, indices, omitnan) +%% Specifies options for taking a mean over dimensions. +% +% obj = obj.mean(stateDim) +% obj = obj.mean(stateDim, []); +% Take a mean over a state dimension. +% +% obj = obj.mean(ensDim, indices); +% Specify how to take a mean over an ensemble dimension. +% +% obj = obj.mean(dims, indexCell) +% Specify how to take a mean over multiple dimensions. +% +% obj = obj.mean(..., nanflag) +% obj = obj.mean(..., omitnan) +% Specify how to treat NaN values when taking a mean +% +% ----- Inputs ----- +% +% stateDim: The name of a state dimension for the variable. A string. +% +% ensDim: The name of an ensemble dimension for the variable. A string. +% +% dims: The names of multiple dimensions. A string vector or cellstring +% vector. May not repeat dimension names. +% +% indices: Mean indices for an ensemble dimension. A vector of integers +% that indicates the position of mean data-elements relative to the +% sequence data-elements. 0 indicates a sequence data-element. 1 is the +% data-element following a sequence data-element. -1 is the data-element +% before a sequence data-element, etc. Mean indices may be in any order +% and cannot have a magnitude larger than the length of the dimension in +% the .grid file. +% +% indexCell: A cell vector. Each element contains mean indices for one +% dimension listed in dims. Must be in the same order as dims. Use an +% empty array for elements corresponding to state dimensions. +% +% nanflag: Options are "includenan" to use NaN values (default) and +% "omitnan" to remove NaN values. Use a string scalar to specify an +% option for all dimensions listed in dims. Use a string vector to +% specify different options for the different dimensions listed in dims. +% +% omitnan: If false (default) includes NaN values in a mean. If true, +% removes NaN values. Use a scalar logical to use the same option for +% all dimensions listed in dims. Use a logical vector to specify +% different options for the different dimensions listed in dims. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object. + +% Error check, dimension index. +d = []; +if ~isempty(dims) + [d, dims] = obj.checkDimensions(dims); +end +nDims = numel(d); + +% Defaults, error check, parse indices +if ~exist('indices','var') || isempty(indices) + indices = cell(1, nDims); +end +[indices, wasCell] = obj.parseInputCell(indices, nDims, 'indexCell'); + +% Default, parse, error check omitnan +if ~exist('omitnan','var') || isempty(omitnan) + omitnan = false; +end +omitnan = obj.parseLogicalString(omitnan, nDims, 'omitnan', 'nanflag', ["omitnan","includenan"], 1, 'NaN options'); + +% Ensemble dimensions. Require indices +name = 'indices'; +for k = 1:nDims + if ~obj.isState(d(k)) + if isempty(indices{k}) + ensMissingIndicesError(obj, dims(k)); + end + + % Error check indices. Save + if wasCell + name = sprintf('Element %.f of indexCell', k); + end + obj.assertAddIndices(indices{k}, d(k), name); + obj.mean_Indices{d(k)} = indices{k}(:); + + % Check that the mean indices do not disrupt a weighted mean + meanSize = numel(indices{k}); + if obj.hasWeights(d(k)) && meanSize~=obj.meanSize(d(k)) + weightsNumberError(obj, dims(k), meanSize, obj.meanSize(d(k))); + end + + % State dimensions. Indices not allowed. Update size and mean size. + else + if ~isempty(indices{k}) + stateHasIndicesError(obj, dims(k)); + end + meanSize = obj.stateSize(d(k)); + obj.stateSize(d(k)) = 1; + end + + % Update mean Size + obj.meanSize(d(k)) = meanSize; +end + +% Update general mean properties +obj.takeMean(d) = true; +obj.omitnan(d) = omitnan; + +end + +% Long error messages +function[] = stateHasIndicesError(obj, dim) +error(['Only ensemble dimensions can have mean indices, but "%s" is a ',... + 'state dimension in variable %s. To make %s an ensemble dimension, ',... + 'see "stateVector.design".'], dim, obj.name, dim); +end +function[] = ensMissingIndicesError(obj, dim) +error(['"%s" is an ensemble dimension in variable %s, so you must specify ',... + 'mean indices in order to take a mean.'], dim, obj.name); +end +function[] = weightsNumberError(obj, dim, newSize, oldSize) +error(['The "%s" dimension of variable "%s" is being used in a weighted ',... + 'mean, but the number of mean indices (%.f) does not match the number ',... + 'of weights (%.f). Either specify %.f mean indices or reset the mean ',... + 'options using "stateVector.resetMeans".'], dim, obj.name, newSize, ... + oldSize, oldSize); +end \ No newline at end of file diff --git a/@stateVectorVariable/parseInputCell.m b/@stateVectorVariable/parseInputCell.m new file mode 100644 index 00000000..da2ddbeb --- /dev/null +++ b/@stateVectorVariable/parseInputCell.m @@ -0,0 +1,30 @@ +function[input, wasCell] = parseInputCell(input, nDims, name) +%% Parses inputs that may either be an input, or a cell vector of inputs. +% Returns the input as a cell. Throws a custom error message if cell +% vectors are incorrect. +% +% input = stateVectorVariable.parseInputCells(input, nDims, name) +% +% ----- Inputs ----- +% +% input: The input being parsed +% +% nDims: The number of input dimensions. +% +% name: The name of the input. A string. +% +% ----- Outputs ----- +% +% input: The input as a cell +% +% wasCell: Scalar logical. Whether the input was a cell or not + +if nDims>1 || iscell(input) + dash.assertVectorTypeN(input, 'cell', nDims, name); + wasCell = true; +else + input = {input}; + wasCell = false; +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/parseLogicalString.m b/@stateVectorVariable/parseLogicalString.m new file mode 100644 index 00000000..0cd52642 --- /dev/null +++ b/@stateVectorVariable/parseLogicalString.m @@ -0,0 +1,53 @@ +function[input] = parseLogicalString(input, nDims, logicalName, stringName, allowedStrings, lastTrue, name) +%% Parses inputs that can either be a logical or string. Returns the input +% as a logical. Throws custom error messages. +% +% input = stateVectorVariable.parseLogicalString( ... +% input, nDims, logicalName, stringName, allowedStrings, lastTrue) +% +% ----- Inputs ----- +% +% input: The input being parsed +% +% nDims: The number of input dimensions. +% +% logicalName: The name of a logical input +% +% stringName: The name of a string input +% +% allowedStrings: Options for the string input. Should be organized such +% that all strings equivalent to a logical true are first. +% +% lastTrue: The index of the last string in allowedStrings equivalent to a +% logical true. +% +% ----- Outputs ----- +% +% input: The input as a logical + +% Default for empty calls +if nDims==0 && isempty(input) + input = false(0,1); +end + +% Logical +if islogical(input) + if ~isscalar(input) + dash.assertVectorTypeN(input, [], nDims, sprintf('Since %s is not a scalar, it', logicalName)); + end + +% Strings +elseif ischar(input) || isstring(input) || iscellstr(input) + input = string(input); + if ~isscalar(input) + dash.assertVectorTypeN(input, [], nDims, sprintf('Since %s is not a string scalar, it', stringName)); + end + k = dash.checkStrsInList(input, allowedStrings, stringName, 'recognized flag'); + input = k<=lastTrue; + +% Anything else +else + error('%s must either be logicals or strings.', name); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/removeOverlap.m b/@stateVectorVariable/removeOverlap.m new file mode 100644 index 00000000..e45060e6 --- /dev/null +++ b/@stateVectorVariable/removeOverlap.m @@ -0,0 +1,84 @@ +function[subMembers] = removeOverlap(obj, subMembers, dims) +%% Updates a set of subscripted ensemble members to only include members +% non-overlapping data. +% +% subMembers = obj.removeOverlap(subMembers, dims) +% Removes any ensemble members with overlapping data from a set of +% subscripted ensemble members. +% +% ----- Inputs ----- +% +% subMembers: A set of subscripted ensemble member indices. +% +% dims: The dimension associated with each column of subMembers. A string +% vector with one element per column in subMembers. +% +% ----- Outputs ----- +% +% subMembers: The updated set of subscripted ensemble members. Any ensemble +% member with overlapping data is deleted from the array + +% Get the dimension indices and sizes +d = obj.checkDimensions(dims); +nDims = numel(d); +nEns = size(subMembers, 1); + +% Get add indices for all ensemble dimensions +nAdd = NaN(1, nDims); +addIndices = cell(1, nDims); +for k = 1:nDims + addIndices{k} = obj.addIndices(d(k)); + nAdd(k) = numel(addIndices{k}); +end + +% Get subscript indices to propagate add indices over all ensemble dimensions +nEls = prod(nAdd); +addindexIndices = cell(1, nDims); +[addindexIndices{:}] = ind2sub( nAdd, (1:nEls)' ); +addindexIndices = cell2mat(addindexIndices); + +% Get subscripted add indices and subscripted reference indices +subAddIndices = NaN(nEls, nDims); +subRefIndices = NaN(nEns, nDims); +for k = 1:nDims + subAddIndices(:,k) = addIndices{k}(addindexIndices(:,k)); + subRefIndices(:,k) = obj.indices{d(k)}(subMembers(:,k)); +end + +% Replicate the add indices over the reference indices and vice versa. +subAddIndices = repmat(subAddIndices, [nEns, 1]); +subRefIndices = repmat( subRefIndices(:)', [nEls, 1]); +subRefIndices = reshape(subRefIndices, [nEns*nEls, nDims]); + +% Find overlapping data and iteratively remove ensemble members until no +% overlap occurs +overlap = findOverlap( subRefIndices, subAddIndices, nEls ); +while ~isempty(overlap) + r = ceil(overlap(1)/nEls); + remove = (r-1)*nEls + (1:nEls); + subRefIndices(remove, :) = []; + subAddIndices(remove, :) = []; + subMembers(r, :) = []; + overlap = findOverlap(subRefIndices, subAddIndices, nEls); +end + +end + +% DRY helper method +function[overlap] = findOverlap(subRefIndices, subAddIndices, nEls) + +% Get the load indices and their associated ensemble member +loadIndices = subRefIndices + subAddIndices; +index = 1:size(loadIndices,1); +member = ceil(index/nEls); + +% Find all load indices that are not the first occurence +[~, first, map] = unique(loadIndices, 'rows', 'stable'); +overlap = index(~ismember(index, first)); + +% Allow overlap in the same ensemble member as the first occurrence +overlapMember = member(overlap); +firstMember = member(first(map(overlap))); +overlap(overlapMember==firstMember) = []; + +end \ No newline at end of file diff --git a/@stateVectorVariable/rename.m b/@stateVectorVariable/rename.m new file mode 100644 index 00000000..3a3fce43 --- /dev/null +++ b/@stateVectorVariable/rename.m @@ -0,0 +1,18 @@ +function[obj] = rename(obj, newName) +%% Changes the name of a state vector variable +% +% obj = obj.rename(newName) +% +% ----- Inputs ----- +% +% newName: The new name of the variable. A string scalar or character row +% vector. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable + +% Change name. Use string internally +obj.name = string(newName); + +end \ No newline at end of file diff --git a/@stateVectorVariable/resetMeans.m b/@stateVectorVariable/resetMeans.m new file mode 100644 index 00000000..eefe0d0e --- /dev/null +++ b/@stateVectorVariable/resetMeans.m @@ -0,0 +1,36 @@ +function[obj] = resetMeans(obj, dims) +%% Resets specifications for means for a stateVectorVariable +% +% obj = obj.resetMeans +% Resets metadata optiosn for all dimensions +% +% obj = obj.resetMetadata(dims) +% Resets metadata options for specified dimensions +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object + +% Default for no dims +if ~exist('dims','var') || isempty(dims) + d = 1:numel(obj.dims); +else + d = obj.checkDimensions(dims); +end +nDims = numel(d); + +% State size +stateMean = obj.isState(d) & obj.takeMean(d); +obj.stateSize(stateMean) = obj.meanSize(stateMean); + +% Mean properties +obj.takeMean(d) = false; +obj.meanSize(d) = NaN; +obj.omitnan(d) = false; +obj.mean_Indices(d) = cell(1, nDims); + +% Weighted means +obj.hasWeights(d) = false; +obj.weightCell(d) = cell(1, nDims); + +end \ No newline at end of file diff --git a/@stateVectorVariable/resetMetadata.m b/@stateVectorVariable/resetMetadata.m new file mode 100644 index 00000000..06a4ccb1 --- /dev/null +++ b/@stateVectorVariable/resetMetadata.m @@ -0,0 +1,29 @@ +function[obj] = resetMetadata(obj, dims) +%% Resets specified metadata and metadata conversion options for a variable. +% +% obj = obj.resetMetadata +% Resets metadata options for all dimensions. +% +% obj = obj.resetMetadata(dims) +% Resets metadata options for specified dimensions. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object + +% Default for no dims +if ~exist('dims','var') || isempty(dims) + d = 1:numel(obj.dims); +else + d = obj.checkDimensions(dims); +end + +% Update +nDims = numel(d); +obj.hasMetadata(d) = false; +obj.metadata(d) = cell(1, nDims); +obj.convert(d) = false; +obj.convertFunction(d) = cell(1, nDims); +obj.convertArgs(d) = cell(1, nDims); + +end \ No newline at end of file diff --git a/@stateVectorVariable/sequence.m b/@stateVectorVariable/sequence.m new file mode 100644 index 00000000..93d09b92 --- /dev/null +++ b/@stateVectorVariable/sequence.m @@ -0,0 +1,88 @@ +function[obj] = sequence(obj, dims, indices, metadata) +%% Specifies to use a sequence of data for ensemble dimensions. +% +% obj = obj.sequence(dim, indices, metadata) +% Designs a sequence for an ensemble dimension and specifies sequence +% metadata. +% +% obj = obj.sequence(dims, indexCell, metadataCell) +% Designs a sequence and specifies metadata for multiple ensemble +% dimensions. +% +% ----- Inputs ----- +% +% dim(s): The name(s) of ensemble dimension(s) in the .grid file for the +% variable. A string vector, cellstring vector, or character row vector. +% May not repeat dimension names. +% +% indices: The sequence indices. A vector of integers that indicates the +% position of sequence data-elements relative to the reference indices. +% 0 indicates the reference index. 1 is the data index following the +% reference index. -1 is the data index before the reference index, etc. +% Sequence indices may be in any order and cannot have a magnitude +% larger than the length of the dimension in the .grid file. +% +% indexCell: A cell vector. Each element contains the sequence indices for +% one dimension listed in dims. Must be in the same dimension order as +% dims. +% +% metadata: Metadata for the sequence. Either a vector with one element per +% sequence index or an array with one row per sequence index. +% +% metadataCell: A cell vector. Each element contains the metadata for one +% dimension listed in dims. Must be in the same dimension order as dims +% +% ----- Output ----- +% +% obj: The updated stateVectorVariable object. + +% Error check the dimensions. Only ensemble dimensions are allowed +[d, dims] = obj.checkDimensions(dims); +if any(obj.isState(d)) + bad = d(find(obj.isState(d),1)); + stateDimensionError(obj, bad); +end +nDims = numel(d); + +% Parse indices and metadata. Error check cell vectors +[indices, wasCell] = obj.parseInputCell(indices, nDims, 'indexCell'); +metadata = obj.parseInputCell(metadata, nDims, 'metadataCell'); + +% Error check indices for each dimension +name = 'indices'; +for k = 1:nDims + if wasCell + name = sprintf('Element %.f of indexCell', k); + end + obj.assertAddIndices(indices{k}, d(k), name); + + % Error check metadata + errorStrs = ['array', 'row']; + if isvector(metadata{k}) + errorStrs = ['vector', 'element']; + metadata{k} = metadata{k}(:); + end + if size(metadata{k},1)~=numel(indices{k}) + metadataSizeError( obj, dims(k), errorStrs, numel(indices{k}), size(metadata{k},1) ); + end + + % Update + obj.stateSize(d(k)) = numel(indices{k}); + obj.seqIndices{d(k)} = indices{k}(:); + obj.seqMetadata{d(k)} = metadata{k}; +end + +end + +% Long error messages +function[] = stateDimensionError(obj, bad) +error(['Only ensemble dimensions can have sequence indices, but %s ', ... + 'is a state dimension in variable %s. To make %s an ',... + 'ensemble dimension, see "stateVector.design".'], obj.dims(bad), ... + obj.name, obj.dims(bad)); +end +function[] = metadataSizeError(obj, dim, strs, nIndex, nRows) +error(['When metadata is a %s, it must have one %s per sequence index (%.f), ',... + 'but the metadata for dimension %s in variable %s currently has %.f %ss.'], ... + strs(1), strs(2), nIndex, dim, obj.name, nRows, strs(2)); +end \ No newline at end of file diff --git a/@stateVectorVariable/specifyMetadata.m b/@stateVectorVariable/specifyMetadata.m new file mode 100644 index 00000000..8b53b770 --- /dev/null +++ b/@stateVectorVariable/specifyMetadata.m @@ -0,0 +1,49 @@ +function[obj] = specifyMetadata(obj, dim, metadata) +%% Specify metadata at the ensemble reference indices along a dimension +% +% obj = obj.specifyMetadata(dim, metadata) +% +% ----- Inputs ----- +% +% dim: The name of the dimension for which metadata is provided. A string +% +% metadata: Metadata at the reference indices for an ensemble dimension. +% Metadata may be numeric, logical, char, string, cellstring, or +% datetime matrix. Must have one row per reference index. Each row must +% be unique and cannot contain NaN or NaT elements. Cellstring +% metadata will be converted into the "string" type. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object + +% Error check, dimension index. Cannot conflict with metadata conversion +d = obj.checkDimensions(dim, false); +if any(obj.convert(d)) + previousMetadataError(obj, d); +end + +% Check the metadata is an allowed gridfile type. Convert cellstrings +metadata = gridfile.checkMetadataField(metadata, dim); + +% Check the rows match the number of reference indices +if size(metadata,1) ~= obj.ensSize(d) + error(['The metadata for the "%s" dimension of variable "%s" must have ',... + 'one row per reference index (%.f rows), but it has %.f rows instead.'], ... + dim, obj.name, obj.ensSize(d), size(metadata,1)); +end + +% Update +obj.hasMetadata(d) = true; +obj.metadata{d} = metadata; + +end + +% Error message +function[] = previousMetadataError(obj, d) +bad = d(find(obj.convert(d),1)); +error('Cannot specify metadata for the "%s" dimension of variable "%s" ',... + 'because you previously specified a metadata conversion function ',... + 'for this dimension. You may want to reset the metadata options ', ... + 'using "stateVector.resetMetadata".', obj.dims(bad), obj.name); +end \ No newline at end of file diff --git a/@stateVectorVariable/stateVectorVariable.m b/@stateVectorVariable/stateVectorVariable.m new file mode 100644 index 00000000..cbec4d08 --- /dev/null +++ b/@stateVectorVariable/stateVectorVariable.m @@ -0,0 +1,147 @@ +classdef stateVectorVariable + % This class implements a custom structure to hold design parameters + % for a variable in a state vector. + + properties (SetAccess = private) + % Set by constructor + name; % The identifying name of the variable + file; % Name of the .grid file + dims; % .grid file dimension order + gridSize; % .grid file size + + % Design + stateSize; % Length of each dimension in the state vector + ensSize; % Number of possible ensemble members + isState; % Whether a dimension is a state dimension + indices; % State indices or ensemble reference indices, as appropriate + + % Sequences + % size; + seqIndices; % Sequence indices + seqMetadata; % Sequence metadata + + % Means + takeMean; % Whether to take a mean over a dimension + meanSize; % Number of elements used in the mean along each dimension + omitnan; % Whether to exclude NaN values + mean_Indices; % Mean indices for ensemble dimensions + + % Weighted means + hasWeights; % Whether the dimension has weights + weightCell; % Weights for each dimension + + % Metadata + hasMetadata; + metadata; + convert; + convertFunction; + convertArgs; + end + + properties (Hidden, Constant) + infoFields = {'name','gridfile','stateSize','possibleMembers',... + 'dimensions','stateDimensions','ensembleDimensions','singletonDimensions'}; + end + + % Constructor + methods + function obj = stateVectorVariable(varName, file) + % Creates a new stateVectorVariable object for data in a .grid + % file. + % + % obj = stateVectorVariable(varName, file); + % + % ----- Inputs ----- + % + % varName: The name of the variable. A string. + % + % file: The name of the .grid file that holds the variable + % + % ----- Outputs ----- + % + % obj: The new stateVectorVariable object + + % Error check + varName = dash.assertStrFlag(varName, 'varName'); + file = dash.assertStrFlag(file, 'file'); + file = dash.checkFileExists(file); + + % Name. Use string internally + obj.name = string(varName); + + % Get gridfile properties + grid = gridfile(file); + obj.file = grid.file; + obj.dims = grid.dims; + obj.gridSize = grid.size; + + % Initialize dimension properties + nDims = numel(obj.dims); + obj.stateSize = NaN(1, nDims); + obj.ensSize = NaN(1, nDims); + obj.isState = true(1, nDims); + obj.indices = cell(1, nDims); + + obj.seqIndices = cell(1, nDims); + obj.seqMetadata = cell(1, nDims); + + obj.takeMean = false(1, nDims); + obj.meanSize = NaN(1, nDims); + obj.omitnan = false(1, nDims); + obj.mean_Indices = cell(1, nDims); + + obj.hasWeights = false(1, nDims); + obj.weightCell = cell(1, nDims); + + obj.hasMetadata = false(1, nDims); + obj.metadata = cell(1, nDims); + obj.convert = false(1, nDims); + obj.convertFunction = cell(1, nDims); + obj.convertArgs = cell(1, nDims); + + % Initialize all dimensions as state dimensions + for d = 1:numel(obj.dims) + obj = obj.design(obj.dims(d), 'state'); + end + end + end + + % Object utilities + methods + [d, dims] = checkDimensions(obj, dims, allowMultiple); + assertAddIndices(obj, indices, d, name); + checkGrid(obj, grid); + obj = trim(obj); + meta = dimMetadata(obj, grid, dim); + obj = matchIndices(obj, meta, grid, dim); + addIndex = addIndices(obj, d); + subMembers = removeOverlap(obj, subMembers, dims); + end + + % Static utilities + methods (Static) + [input, wasCell] = parseInputCell(input, nDims, name); + input = parseLogicalString(input, nDims, logicalName, stringName, allowedStrings, lastTrue, name); + end + + % Interface methods + methods + obj = design(obj, dims, type, indices); + obj = sequence(obj, dims, indices, metadata); + + obj = mean(obj, dims, indices, omitnan); + obj = weightedMean(obj, dims, weights); + obj = resetMeans(obj, dims); + + obj = specifyMetadata(obj, dim, metadata); + obj = convertMetadata(obj, dim, convertFunction, functionArgs); + obj = resetMetadata(obj, dims); + + dims = dimensions(obj, type); + [varInfo, dimInfo] = info(obj); + obj = rename(obj, newName); + + X = buildEnsemble(obj, subMembers, dims, grid, sources); + end +end + \ No newline at end of file diff --git a/@stateVectorVariable/trim.m b/@stateVectorVariable/trim.m new file mode 100644 index 00000000..83e7e681 --- /dev/null +++ b/@stateVectorVariable/trim.m @@ -0,0 +1,33 @@ +function[obj] = trim(obj) +%% Adjusts reference indices to only allow complete sequences and means. +% +% obj = obj.trim +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable + +% Get the length of each ensemble dimension in the .grid file +d = find(~obj.isState); +for k = 1:numel(d) + dimLength = obj.gridSize(d(k)); + + % Get the maximum and minimum add indices + addIndices = obj.addIndices(d(k)); + maxAdd = max(addIndices); + maxSubtract = min(addIndices); + + % Find reference indices that would exceed or precede the dimension + tooLong = (obj.indices{d(k)} + maxAdd) > dimLength; + tooShort = (obj.indices{d(k)} + maxSubtract) < 1; + remove = tooLong | tooShort; + + % Remove the indices. Update metadata and size + obj.indices{d(k)}(remove) = []; + if obj.hasMetadata(d(k)) + obj.metadata{d(k)}(remove,:) = []; + end + obj.ensSize(d(k)) = numel(obj.indices{d(k)}); +end + +end \ No newline at end of file diff --git a/@stateVectorVariable/weightedMean.m b/@stateVectorVariable/weightedMean.m new file mode 100644 index 00000000..ff99e906 --- /dev/null +++ b/@stateVectorVariable/weightedMean.m @@ -0,0 +1,104 @@ +function[obj] = weightedMean(obj, dims, weights) +%% Specify options for taking a weighted mean over dimensions. +% +% obj = obj.weightedMean(dim, weights) +% Takes a weighted mean over a dimension. +% +% obj = obj.weightedMean(dims, weightCell) +% obj = obj.weightedMean(dims, weightArray) +% Takes a weighted mean over multiple dimensions. +% +% ----- Inputs ----- +% +% dim: The name of a dimension over which to take a weighted mean. A string +% +% weights: A numeric vector containing the mean weights. If dim is a state +% dimension, must have a length equal to the number of state indices. +% If dim is an ensemble dimension, the length must be equal to the +% number of mean indices. (See stateVector.info to summarize dimension +% properties). May not contain NaN, Inf, or complex numbers. +% +% weightCell: A cell vector. Each element contains mean weights for one +% dimension listed in dims. Must be in the same order as dims. +% +% weightArray: An N-dimensional numeric array containing weights for taking +% a mean across specified dimensions. Must have a dimension for each +% dimension listed in dims and must have the same dimension order as +% dims. The length of each dimension of weightArray must be equal to +% either the number of state indices or mean indices, as appropriate. +% (See the "weights" input for details). May not contain NaN, Inf, or +% complex numbers. If an element of weightCell is an empty array, uses +% equal weights for elements along the associated dimension. +% +% ----- Outputs ----- +% +% obj: The updated stateVectorVariable object + +% Error check, dimension index +[d, dims] = obj.checkDimensions(dims); +nDims = numel(d); + +% Add any new dimensions to mean. Note that weights exist +obj = obj.mean( dims(~obj.takeMean(d)) ); +obj.hasWeights(d) = true; + +% Error check weightArray +if nDims>1 && isnumeric(weights) + dash.assertRealDefined(weights, 'weightArray'); + + % Check there are no more than nDims non-singleton dimensions + siz = size(weights); + last = max([1, find(siz~=1, 1, 'last')]); + if last > nDims + tooManyDimsError(obj, last, nDims); + end + + % Get the size in all specified dims. Check they match dimension sizes + siz(last+1:end) = []; + siz(last+1:nDims) = 1; + if ~isequal(siz, obj.meanSize(d)) + bad = find(siz~=obj.meanSize(d),1); + incorrectLengthError(obj, obj.dims(d(bad)), bad, siz(bad), obj.meanSize(d(bad))); + end + + % Permute to match internal order. Break into weightCell. Save + weights = dash.permuteToOrder(weights, d, numel(obj.dims)); + for k = 1:nDims + weightVector = sum(weights, d([1:k-1,k+1:end])); + obj.weightCell{d(k)} = weightVector(:); + end + +% Parse and error check weightCell +else + [weights, wasCell] = obj.parseInputCell(weights, nDims, 'weightCell'); + name = 'weights'; + + % If weights is empty, this is an unweighted mean + for k = 1:nDims + if isempty(obj.weightCell{d(k)}) + obj.hasWeights(d(k)) = false; + + % Otherwise, error check weights and update + else + if wasCell + name = sprintf('Element %.f of weightCell', k); + end + dash.assertVectorTypeN(weights{k}, 'numeric', obj.meanSize(d(k)), name); + dash.assertRealDefined(weights{k}, name); + obj.weightCell{d(k)} = weights{k}(:); + end + end +end + +end + +% Long error messages +function[] = tooManyDimsError(obj, last, nDims) +error(['weightArray for variable "%s" should have %.f dimensions, but it ', ... + 'has %.f instead.'], obj.name, nDims, last); +end +function[] = incorrectLengthError(obj, dim, bad, newSize, oldSize) +error(['Dimension %.f of weightArray (%s) for variable "%s" must have ',... + '%.f elements, but it has %.f elements instead.'], bad, dim, ... + obj.name, oldSize, newSize); +end \ No newline at end of file diff --git a/dataSource.m b/dataSource.m index ab68b640..d7389b6f 100644 --- a/dataSource.m +++ b/dataSource.m @@ -57,9 +57,9 @@ % additive constant (b). % Error check strings, vectors - dash.assertStrFlag(file, "file"); - dash.assertStrFlag(var, "var"); - dash.assertStrList(dims, "dims"); + file = dash.assertStrFlag(file, "file"); + var = dash.assertStrFlag(var, "var"); + dims = dash.assertStrList(dims, "dims"); file = dash.checkFileExists(file); % Error check the post-processing values @@ -80,7 +80,7 @@ % Save properties obj.file = file; obj.var = var; - obj.unmergedDims = string(dims); + obj.unmergedDims = dims; obj.fill = fill; obj.range = range; obj.convert = convert;