forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
apacheGH-38417: [MATLAB] Implement a
TableTypeValidator
class that …
…validates a MATLAB `cell` array contains only `table`s that share the same schema (apache#38551) ### Rationale for this change This is a followup to apache#38533. Adding this `TableValidator` class is a step towards implementing the `arrow.array.ListArray.fromMATLAB` method for creating `ListArray`s whose `ValueType` is a `StructArray`. This validator will ensure all `table`s in a `cell` array have the same schema when attempting to make a `ListArray` of `Struct`s. This is a requirement to ensure the `table`s in the `cell` array are vertcat'ble. For example, two `table`s with different `VariableNames` cannot be concatenated together: ```matlab >> t1 = table(1, 2, VariableNames=["A", "B"]); >> t2 = table(3, 4, VariableNames=["C", "D"]); >> vertcat(t1, t2) Error using tabular/vertcat All tables being vertically concatenated must have the same variable names. ``` ### What changes are included in this PR? Modified `arrow.array.internal.list.Validator` to inherit from `matlab.mixin.Heterogeneous`. Doing so enables creating an array whose elements are different subclasses of `arrow.array.internal.list.Validator`. Added a new MATLAB class `arrow.array.internal.list.TableValidator`, which inherits from `arrow.array.internal.list.Validator`. This class has two properties: `VariableNames` and `VariableValidators`. `VariableNames` is a `string` array containing the expected variable names of all `table`s. `VariableValidators` is an array of `arrow.array.internal.list.Validator`, in which each element represents one variable in a `table`. This array is used to validate `table` variables have the expected type and configuration. `TableValidator`'s `validateElement` method uses both its `VariableNames` and `VariableValidator` properties to validate the input argument provided is a `table` with the expected schema. If not, it throws an error. Lastly, I added a gateway function called `arrow.array.internal.list.createValidator`, which creates the appropriate `Validator` subclass based on the input. If no such `Validator` exists, an error is thrown. ### Are these changes tested? Yes. Added two new test classes: `tTableValidator.m` and `tCreateValidator.m`. ### Are there any user-facing changes? No. ### Future Directions: 1. apache#38354 * Closes: apache#38417 Authored-by: Sarah Gilmore <[email protected]> Signed-off-by: Kevin Gurney <[email protected]>
- Loading branch information
1 parent
0de4f43
commit 1de0407
Showing
5 changed files
with
550 additions
and
1 deletion.
There are no files selected for viewing
90 changes: 90 additions & 0 deletions
90
matlab/src/matlab/+arrow/+array/+internal/+list/TableValidator.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
% Licensed to the Apache Software Foundation (ASF) under one or more | ||
% contributor license agreements. See the NOTICE file distributed with | ||
% this work for additional information regarding copyright ownership. | ||
% The ASF licenses this file to you under the Apache License, Version | ||
% 2.0 (the "License"); you may not use this file except in compliance | ||
% with the License. You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
% implied. See the License for the specific language governing | ||
% permissions and limitations under the License. | ||
|
||
classdef TableValidator < arrow.array.internal.list.ClassTypeValidator | ||
|
||
properties (GetAccess=public, SetAccess=private) | ||
VariableNames string = string.empty(1, 0) | ||
VariableValidators arrow.array.internal.list.Validator = arrow.array.internal.list.Validator.empty(1, 0) | ||
end | ||
|
||
methods | ||
function obj = TableValidator(T) | ||
arguments | ||
T table | ||
end | ||
|
||
numVars = width(T); | ||
|
||
if (numVars == 0) | ||
error("arrow:array:list:TableWithZeroVariables", ... | ||
"Expected table to have at least one variable."); | ||
end | ||
|
||
[email protected](table); | ||
obj.VariableNames = string(T.Properties.VariableNames); | ||
validators = cell([1 numVars]); | ||
for ii = 1:numVars | ||
validators{ii} = arrow.array.internal.list.createValidator(T.(ii)); | ||
end | ||
|
||
obj.VariableValidators = [validators{:}]; | ||
end | ||
|
||
function validateElement(obj, element) | ||
% Verify element is a table | ||
[email protected](obj, element); | ||
|
||
% Validate element has the expected number of variables | ||
numVars = numel(obj.VariableNames); | ||
if width(element) ~= numVars | ||
id = "arrow:array:list:NumVariablesMismatch"; | ||
msg = "Expected all tables in the cell array to have " + ... | ||
string(numVars) + " variables."; | ||
error(id, msg); | ||
end | ||
|
||
% Validate element has the expected variable names | ||
if ~all(obj.VariableNames == string(element.Properties.VariableNames)) | ||
id = "arrow:array:list:VariableNamesMismatch"; | ||
msg = "Expected all tables in the cell array to have the " + ... | ||
"same variable names."; | ||
error(id, msg); | ||
end | ||
|
||
for ii=1:numVars | ||
var = element.(ii); | ||
|
||
% In order to concatenate tables together later, require | ||
% all non-tabular variables to be columnar or empty. | ||
if ~istable(var) && (~iscolumn(var) && ~isempty(var)) | ||
id = "arrow:array:list:NonTabularVariablesMustBeColumnar"; | ||
msg = "Expected all variables except for nested tables to be columnar."; | ||
error(id, msg); | ||
end | ||
|
||
obj.VariableValidators(ii).validateElement(var); | ||
end | ||
end | ||
|
||
function length = getElementLength(~, element) | ||
length = height(element); | ||
end | ||
|
||
function C = reshapeCellElements(~, C) | ||
% NO-OP for cell array of tables | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
matlab/src/matlab/+arrow/+array/+internal/+list/createValidator.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
% Licensed to the Apache Software Foundation (ASF) under one or more | ||
% contributor license agreements. See the NOTICE file distributed with | ||
% this work for additional information regarding copyright ownership. | ||
% The ASF licenses this file to you under the Apache License, Version | ||
% 2.0 (the "License"); you may not use this file except in compliance | ||
% with the License. You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
% implied. See the License for the specific language governing | ||
% permissions and limitations under the License. | ||
|
||
function validator = createValidator(data) | ||
import arrow.array.internal.list.ClassTypeValidator | ||
import arrow.array.internal.list.DatetimeValidator | ||
import arrow.array.internal.list.TableValidator | ||
|
||
if isnumeric(data) | ||
validator = ClassTypeValidator(data); | ||
elseif islogical(data) | ||
validator = ClassTypeValidator(data); | ||
elseif isduration(data) | ||
validator = ClassTypeValidator(data); | ||
elseif isstring(data) | ||
validator = ClassTypeValidator(data); | ||
elseif iscell(data) | ||
validator = ClassTypeValidator(data); | ||
elseif isdatetime(data) | ||
validator = DatetimeValidator(data); | ||
elseif istable(data) | ||
validator = TableValidator(data); | ||
else | ||
errorID = "arrow:array:list:UnsupportedDataType"; | ||
msg = "Unable to create a ListArray from a cell array containing " + class(data) + " values."; | ||
error(errorID, msg); | ||
end | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
%TCREATEVALIDATOR Unit tests for arrow.array.internal.list.createValidator. | ||
|
||
% Licensed to the Apache Software Foundation (ASF) under one or more | ||
% contributor license agreements. See the NOTICE file distributed with | ||
% this work for additional information regarding copyright ownership. | ||
% The ASF licenses this file to you under the Apache License, Version | ||
% 2.0 (the "License"); you may not use this file except in compliance | ||
% with the License. You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
% implied. See the License for the specific language governing | ||
% permissions and limitations under the License. | ||
|
||
classdef tCreateValidator < matlab.unittest.TestCase | ||
|
||
properties (TestParameter) | ||
NumericTypes | ||
end | ||
|
||
methods (TestParameterDefinition, Static) | ||
function NumericTypes = initializeNumericTypes() | ||
NumericTypes = {"uint8", ... | ||
"uint16", ... | ||
"uint32", ... | ||
"uint64", ... | ||
"int8", ... | ||
"int16", ... | ||
"int32", ... | ||
"int64", ... | ||
"single", ... | ||
"double"}; | ||
end | ||
end | ||
|
||
methods (Test) | ||
function TestNumericTypes(testCase, NumericTypes) | ||
% Verify createValidator returns a ClassTypeValidator with the | ||
% expected ClassName value when given a numeric array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = cast(1, NumericTypes); | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.ClassName, NumericTypes); | ||
end | ||
|
||
function TestLogical(testCase) | ||
% Verify createValidator returns a ClassTypeValidator whose | ||
% ClassName property is set to "logical" when given a logical | ||
% array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = true; | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.ClassName, "logical"); | ||
end | ||
|
||
function TestDuration(testCase) | ||
% Verify createValidator returns a ClassTypeValidator whose | ||
% ClassName property is set to "duration" when given a duration | ||
% array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = seconds(1); | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.ClassName, "duration"); | ||
end | ||
|
||
function TestString(testCase) | ||
% Verify createValidator returns a ClassTypeValidator whose | ||
% ClassName property is set to "string" when given a string | ||
% array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = "Hello World"; | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.ClassName, "string"); | ||
end | ||
|
||
function TestCell(testCase) | ||
% Verify createValidator returns a ClassTypeValidator whose | ||
% ClassName property is set to "cell" when given a cell | ||
% array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = {"Hello World"}; | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.ClassName, "cell"); | ||
end | ||
|
||
function TestDatetime(testCase) | ||
% Verify createValidator returns a DatetimeValidator when given | ||
% a datetime array as input. | ||
import arrow.array.internal.list.createValidator | ||
data = datetime(2023, 10, 31); | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.DatetimeValidator"); | ||
testCase.verifyEqual(validator.ClassName, "datetime"); | ||
testCase.verifyEqual(validator.Zoned, false); | ||
end | ||
|
||
function TestTable(testCase) | ||
% Verify createValidator returns a TableValidator when given | ||
% a table as input. | ||
import arrow.array.internal.list.createValidator | ||
data = table(1, "A", VariableNames=["Number", "Letter"]); | ||
validator = createValidator(data); | ||
testCase.verifyInstanceOf(validator, "arrow.array.internal.list.TableValidator"); | ||
testCase.verifyEqual(validator.VariableNames, ["Number", "Letter"]); | ||
testCase.verifyEqual(numel(validator.VariableValidators), 2); | ||
testCase.verifyInstanceOf(validator.VariableValidators(1), "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.VariableValidators(1).ClassName, "double"); | ||
testCase.verifyInstanceOf(validator.VariableValidators(2), "arrow.array.internal.list.ClassTypeValidator"); | ||
testCase.verifyEqual(validator.VariableValidators(2).ClassName, "string"); | ||
|
||
end | ||
|
||
function UnsupportedDataTypeError(testCase) | ||
% Verify createValidator throws an exception whose identifier | ||
% is "arrow:array:list:UnsupportedDataType" when given an | ||
% unsupported datatype as input. | ||
import arrow.array.internal.list.createValidator | ||
data = calyears(1); | ||
fcn = @() createValidator(data); | ||
testCase.verifyError(fcn, "arrow:array:list:UnsupportedDataType"); | ||
end | ||
end | ||
end |
Oops, something went wrong.