From 0db1842e8bc2d218b49d1ac431a11e61c7ac9a29 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 23 Oct 2023 11:37:33 -0400 Subject: [PATCH] GH-37815: [MATLAB] Add `arrow.array.ListArray` MATLAB class (#38357) ### Rationale for this change Now that many of the commonly-used "primitive" array types have been added to the MATLAB interface, we can implement an `arrow.array.ListArray` class. This pull request adds a new `arrow.array.ListArray` class which can be converted to a MATLAB `cell` array by calling the static `toMATLAB` method. ### What changes are included in this PR? 1. Added a new `arrow.array.ListArray` MATLAB class. *Methods* `cellArray = arrow.array.ListArray.toMATLAB()` `listArray = arrow.array.ListArray.fromArrays(offsets, values)` *Properties* `Offsets` - `Int32Array` list offsets (uses zero-based indexing) `Values` - Array of values in the list (supports nesting) 2. Added a new `arrow.type.traits.ListTraits` MATLAB class. **Example** ```matlab >> offsets = arrow.array(int32([0, 2, 3, 7])) offsets = [ 0, 2, 3, 7 ] >> values = arrow.array(["A", "B", "C", "D", "E", "F", "G"]) values = [ "A", "B", "C", "D", "E", "F", "G" ] >> arrowArray = arrow.array.ListArray.fromArrays(offsets, values) arrowArray = [ [ "A", "B" ], [ "C" ], [ "D", "E", "F", "G" ] ] >> matlabArray = arrowArray.toMATLAB() matlabArray = 3x1 cell array {2x1 string} {["C" ]} {4x1 string} >> matlabArray{:} ans = 2x1 string array "A" "B" ans = "C" ans = 4x1 string array "D" "E" "F" "G" ``` ### Are these changes tested? Yes. 1. Added a new `tListArray.m` test class. 2. Added a new `tListTraits.m` test class. 3. Updated `arrow.internal.test.tabular.createAllSupportedArrayTypes` to include `ListArray`. ### Are there any user-facing changes? Yes. 1. Users can now create an `arrow.array.ListArray` from an `offsets` and `values` array by calling the static `arrow.array.ListArray.fromArrays(offsets, values)` method. `ListArray`s can be converted into MATLAB `cell` arrays by calling the static `arrow.array.ListArray.toMATLAB` method. ### Notes 1. We chose to use the "missing-class" `missing` value as the `NullSubstitutionValue` for the time being for `ListArray`. However, we eventually want to add `arrow.array.NullArray`, and will most likely want to use the "missing-class" `missing` value to represent `NullArray` values in MATLAB. So, this could cause some ambiguity in the future. We have been thinking about whether we should consider introducing some sort of special "sentinel value" to represent null values when converting to MATLAB `cell` arrays. Perhaps, something like `arrow.Null`, or something to that effect, in order to avoid this ambiguity. If we think it makes sense to do that, we may want to retroactively change the `NullSubstitutionValue` to be `arrow.Null` and break compatibility. Since we are still in pre-`0.1`, we don't think the impact of such a behavior change would be very large. 2. Implementing `ListArray` is fairly involved. So, in the spirit of incremental delivery, we chose not to include an implementation of `arrow.array.ListArray.fromMATLAB` in this initial pull request. We plan on following up with some more changes to `arrow.array.ListArray`. See #38353, #38354, and #38361. 3. Thank you @ sgilmore10 for your help with this pull request! ### Future Directions 1. #38353 2. #38354 3. #38361 4. Consider adding a null sentinel value like `arrow.Null` for conversion to MATLAB `cell` arrays. * Closes: #37815 Lead-authored-by: Kevin Gurney Co-authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../arrow/matlab/array/proxy/list_array.cc | 103 +++++++++++ .../cpp/arrow/matlab/array/proxy/list_array.h | 38 ++++ .../src/cpp/arrow/matlab/array/proxy/wrap.cc | 3 + matlab/src/cpp/arrow/matlab/error/error.h | 1 + matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 + matlab/src/matlab/+arrow/+array/ListArray.m | 111 ++++++++++++ .../+tabular/createAllSupportedArrayTypes.m | 7 + .../matlab/+arrow/+type/+traits/ListTraits.m | 10 +- matlab/src/matlab/+arrow/array.m | 2 +- matlab/test/arrow/array/tListArray.m | 165 ++++++++++++++++++ matlab/test/arrow/type/traits/tListTraits.m | 10 +- .../cmake/BuildMatlabArrowInterface.cmake | 1 + 12 files changed, 444 insertions(+), 9 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc create mode 100644 matlab/src/cpp/arrow/matlab/array/proxy/list_array.h create mode 100644 matlab/src/matlab/+arrow/+array/ListArray.m create mode 100644 matlab/test/arrow/array/tListArray.m diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc new file mode 100644 index 0000000000000..fc75e55dd6012 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/array/proxy/list_array.h" +#include "arrow/matlab/array/proxy/numeric_array.h" +#include "arrow/matlab/array/proxy/wrap.h" +#include "arrow/matlab/error/error.h" +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::array::proxy { + + ListArray::ListArray(std::shared_ptr list_array) : proxy::Array{std::move(list_array)} { + REGISTER_METHOD(ListArray, getValues); + REGISTER_METHOD(ListArray, getOffsets); + } + + libmexclass::proxy::MakeResult ListArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using libmexclass::proxy::ProxyManager; + using Int32ArrayProxy = arrow::matlab::array::proxy::NumericArray; + using ListArrayProxy = arrow::matlab::array::proxy::ListArray; + using ArrayProxy = arrow::matlab::array::proxy::Array; + + mda::StructArray opts = constructor_arguments[0]; + const mda::TypedArray offsets_proxy_id_mda = opts[0]["OffsetsProxyID"]; + const mda::TypedArray values_proxy_id_mda = opts[0]["ValuesProxyID"]; + const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; + + const auto offsets_proxy_id = offsets_proxy_id_mda[0]; + const auto values_proxy_id = values_proxy_id_mda[0]; + + const auto offsets_proxy = std::static_pointer_cast(ProxyManager::getProxy(offsets_proxy_id)); + const auto values_proxy = std::static_pointer_cast(ProxyManager::getProxy(values_proxy_id)); + + const auto offsets = offsets_proxy->unwrap(); + const auto values = values_proxy->unwrap(); + + // Pack the validity bitmap values. + MATLAB_ASSIGN_OR_ERROR(auto validity_bitmap_buffer, + bit::packValid(validity_bitmap_mda), + error::BITPACK_VALIDITY_BITMAP_ERROR_ID); + + // Create a ListArray from values and offsets. + MATLAB_ASSIGN_OR_ERROR(auto array, + arrow::ListArray::FromArrays(*offsets, *values, arrow::default_memory_pool(), validity_bitmap_buffer), + error::LIST_ARRAY_FROM_ARRAYS_FAILED); + + // Return a ListArray Proxy. + auto list_array = std::static_pointer_cast(array); + return std::make_shared(std::move(list_array)); + } + + void ListArray::getValues(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using libmexclass::proxy::ProxyManager; + + auto list_array = std::static_pointer_cast(array); + auto value_array = list_array->values(); + + // Wrap the array within a proxy object if possible. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto value_array_proxy, + proxy::wrap(value_array), + context, error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + const auto value_array_proxy_id = ProxyManager::manageProxy(value_array_proxy); + const auto type_id = value_array->type_id(); + + // Return a struct with two fields: ProxyID and TypeID. The MATLAB + // layer will use these values to construct the appropriate MATLAB + // arrow.array.Array subclass. + mda::ArrayFactory factory; + mda::StructArray output = factory.createStructArray({1, 1}, {"ProxyID", "TypeID"}); + output[0]["ProxyID"] = factory.createScalar(value_array_proxy_id); + output[0]["TypeID"] = factory.createScalar(static_cast(type_id)); + context.outputs[0] = output; + } + + void ListArray::getOffsets(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using libmexclass::proxy::ProxyManager; + using Int32ArrayProxy = arrow::matlab::array::proxy::NumericArray; + auto list_array = std::static_pointer_cast(array); + auto offsets_array = list_array->offsets(); + auto offsets_int32_array = std::static_pointer_cast(offsets_array); + auto offsets_int32_array_proxy = std::make_shared(offsets_int32_array); + const auto offsets_int32_array_proxy_id = ProxyManager::manageProxy(offsets_int32_array_proxy); + mda::ArrayFactory factory; + context.outputs[0] = factory.createScalar(offsets_int32_array_proxy_id); + } +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h new file mode 100644 index 0000000000000..8db6b6bf1d632 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/array/proxy/array.h" + +namespace arrow::matlab::array::proxy { + +class ListArray : public arrow::matlab::array::proxy::Array { + + public: + ListArray(std::shared_ptr list_array); + ~ListArray() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getValues(libmexclass::proxy::method::Context& context); + void getOffsets(libmexclass::proxy::method::Context& context); + + }; + +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc index b14f4b18711cb..8e300c959717a 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc @@ -21,6 +21,7 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" #include "arrow/matlab/array/proxy/string_array.h" +#include "arrow/matlab/array/proxy/list_array.h" #include "arrow/matlab/array/proxy/struct_array.h" namespace arrow::matlab::array::proxy { @@ -62,6 +63,8 @@ namespace arrow::matlab::array::proxy { return std::make_shared>(std::static_pointer_cast(array)); case ID::STRING: return std::make_shared(std::static_pointer_cast(array)); + case ID::LIST: + return std::make_shared(std::static_pointer_cast(array)); case ID::STRUCT: return std::make_shared(std::static_pointer_cast(array)); default: diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index 7dcecfa433567..a512fdcc89fa9 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -197,6 +197,7 @@ namespace arrow::matlab::error { static const char* CHUNKED_ARRAY_NUMERIC_INDEX_WITH_EMPTY_CHUNKED_ARRAY = "arrow:chunkedarray:NumericIndexWithEmptyChunkedArray"; static const char* CHUNKED_ARRAY_INVALID_NUMERIC_CHUNK_INDEX = "arrow:chunkedarray:InvalidNumericChunkIndex"; static const char* STRUCT_ARRAY_MAKE_FAILED = "arrow:array:StructArrayMakeFailed"; + static const char* LIST_ARRAY_FROM_ARRAYS_FAILED = "arrow:array:ListArrayFromArraysFailed"; static const char* INDEX_EMPTY_CONTAINER = "arrow:index:EmptyContainer"; static const char* INDEX_OUT_OF_RANGE = "arrow:index:OutOfRange"; static const char* BUFFER_VIEW_OR_COPY_FAILED = "arrow:buffer:ViewOrCopyFailed"; diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 5caf8d9fc8f2d..c3b05bec32fda 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -22,6 +22,7 @@ #include "arrow/matlab/array/proxy/time32_array.h" #include "arrow/matlab/array/proxy/time64_array.h" #include "arrow/matlab/array/proxy/struct_array.h" +#include "arrow/matlab/array/proxy/list_array.h" #include "arrow/matlab/array/proxy/chunked_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/tabular/proxy/table.h" @@ -61,6 +62,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); + REGISTER_PROXY(arrow.array.proxy.ListArray , arrow::matlab::array::proxy::ListArray); REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); diff --git a/matlab/src/matlab/+arrow/+array/ListArray.m b/matlab/src/matlab/+arrow/+array/ListArray.m new file mode 100644 index 0000000000000..f8fd934b7c448 --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/ListArray.m @@ -0,0 +1,111 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef ListArray < arrow.array.Array + + properties (Hidden, GetAccess=public, SetAccess=private) + NullSubstitutionValue = missing; + end + + properties (Dependent, GetAccess=public, SetAccess=private) + Values + Offsets + end + + methods + + function obj = ListArray(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.array.proxy.ListArray")} + end + import arrow.internal.proxy.validate + obj@arrow.array.Array(proxy); + end + + function values = get.Values(obj) + valueStruct = obj.Proxy.getValues(); + traits = arrow.type.traits.traits(arrow.type.ID(valueStruct.TypeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=valueStruct.ProxyID); + values = traits.ArrayConstructor(proxy); + end + + function offsets = get.Offsets(obj) + proxyID = obj.Proxy.getOffsets(); + proxy = libmexclass.proxy.Proxy(Name="arrow.array.proxy.Int32Array", ID=proxyID); + offsets = arrow.array.Int32Array(proxy); + end + + function matlabArray = toMATLAB(obj) + numElements = obj.NumElements; + matlabArray = cell(numElements, 1); + + values = toMATLAB(obj.Values); + % Add one to Offsets array because MATLAB + % uses 1-based indexing. + offsets = toMATLAB(obj.Offsets) + 1; + + startIndex = offsets(1); + for ii = 1:numElements + % Subtract 1 because ending offset value is exclusive. + endIndex = offsets(ii + 1) - 1; + matlabArray{ii} = values(startIndex:endIndex, :); + startIndex = endIndex + 1; + end + + hasInvalid = ~all(obj.Valid); + if hasInvalid + matlabArray(~obj.Valid) = {obj.NullSubstitutionValue}; + end + end + + end + + methods (Static) + + function array = fromArrays(offsets, values, opts) + arguments + offsets (1, 1) arrow.array.Int32Array + values (1, 1) arrow.array.Array + opts.Valid + end + + import arrow.internal.validate.parseValid + + if nargin < 2 + error("arrow:array:list:FromArraysValuesAndOffsets", ... + "Must supply both an offsets and values array to construct a ListArray.") + end + + % Offsets should contain one more element than the number of elements in the output ListArray. + numElements = offsets.NumElements - 1; + + validElements = parseValid(opts, numElements); + offsetsProxyID = offsets.Proxy.ID; + valuesProxyID = values.Proxy.ID; + + args = struct(... + OffsetsProxyID=offsetsProxyID, ... + ValuesProxyID=valuesProxyID, ... + Valid=validElements ... + ); + + proxyName = "arrow.array.proxy.ListArray"; + proxy = arrow.internal.proxy.create(proxyName, args); + array = arrow.array.ListArray(proxy); + end + + end + +end diff --git a/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m index d3a751ca46731..ad2f026d64e20 100644 --- a/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m +++ b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m @@ -70,6 +70,13 @@ stringArray = arrow.array(strings); arrowArrays{ii} = StructArray.fromArrays(timestampArray, stringArray); matlabData{ii} = table(dates, strings, VariableNames=["Field1", "Field2"]); + elseif name == "arrow.array.ListArray" + offsets = arrow.array(int32(0:opts.NumRows)); + numbers = randomNumbers("double", opts.NumRows); + matlabData{ii} = num2cell(numbers); + values = arrow.array(numbers); + listArray = ListArray.fromArrays(offsets, values); + arrowArrays{ii} = listArray; else error("arrow:test:SupportedArrayCase", ... "Missing if-branch for array class " + name); diff --git a/matlab/src/matlab/+arrow/+type/+traits/ListTraits.m b/matlab/src/matlab/+arrow/+type/+traits/ListTraits.m index c1c87790342ea..26a0c6d340603 100644 --- a/matlab/src/matlab/+arrow/+type/+traits/ListTraits.m +++ b/matlab/src/matlab/+arrow/+type/+traits/ListTraits.m @@ -16,15 +16,17 @@ classdef ListTraits < arrow.type.traits.TypeTraits properties (Constant) - ArrayConstructor = missing - ArrayClassName = missing - ArrayProxyClassName = missing + ArrayConstructor = @arrow.array.ListArray + ArrayClassName = "arrow.array.ListArray" + ArrayProxyClassName = "arrow.array.proxy.ListArray" ArrayStaticConstructor = missing TypeConstructor = @arrow.type.ListType TypeClassName = "arrow.type.ListType" TypeProxyClassName = "arrow.type.proxy.ListType" + % The cell function works differently than other + % "type construction functions" in MATLAB. MatlabConstructor = missing - MatlabClassName = missing + MatlabClassName = "cell" end end diff --git a/matlab/src/matlab/+arrow/array.m b/matlab/src/matlab/+arrow/array.m index e34eb8b3fcc6c..50221b4b955df 100644 --- a/matlab/src/matlab/+arrow/array.m +++ b/matlab/src/matlab/+arrow/array.m @@ -62,4 +62,4 @@ if ~ischar(data) data = convertCharsToStrings(data); end -end \ No newline at end of file +end diff --git a/matlab/test/arrow/array/tListArray.m b/matlab/test/arrow/array/tListArray.m new file mode 100644 index 0000000000000..1ebf66e2f0999 --- /dev/null +++ b/matlab/test/arrow/array/tListArray.m @@ -0,0 +1,165 @@ +%TLISTARRAY Tests for arrow.array.ListArray + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tListArray < matlab.unittest.TestCase + + properties (Constant) + Traits = arrow.type.traits.traits(arrow.type.ID.List) + end + + properties (TestParameter) + TestArrowArray + end + + methods (TestParameterDefinition, Static) + + function TestArrowArray = initializeTestArrowArray() + %% Empty (zero-element) list (List) + Type = arrow.list(arrow.float64()); + NumElements = int64(0); + Valid = logical.empty(0, 1); + Offsets = arrow.array(int32(0)); + Values = arrow.array([]); + ArrowArray = arrow.array.ListArray.fromArrays(Offsets, Values, Valid=Valid); + MatlabArray = {cell.empty(0, 1)}; + + TestArrowArray.EmptyList = struct( ... + ArrowArray=ArrowArray, ... + MatlabArray=MatlabArray, ... + Properties=struct(... + Type=Type, ... + NumElements=NumElements, ... + Valid=Valid, ... + Offsets=Offsets, ... + Values=Values ... + ) ... + ); + + %% List with NULLs (List) + Type = arrow.list(arrow.string()); + NumElements = int64(4); + Valid = [true, false, true, false]; + Offsets = arrow.array(int32([0, 1, 4, 6, 7])); + Values = arrow.array(["A", missing, "C", "D", "E", missing, "G"]); + ArrowArray = arrow.array.ListArray.fromArrays(Offsets, Values, Valid=Valid); + MatlabArray = {{"A"; missing; ["E"; missing]; missing}}; + + TestArrowArray.NullList = struct( ... + ArrowArray=ArrowArray, ... + MatlabArray=MatlabArray, ... + Properties=struct(... + Type=Type, ... + NumElements=NumElements, ... + Valid=Valid, ... + Offsets=Offsets, ... + Values=Values ... + ) ... + ); + + %% Single-level list (List) + Type = arrow.list(arrow.float64()); + NumElements = int64(3); + Valid = true(1, NumElements); + Offsets = arrow.array(int32([0, 2, 5, 9])); + Values = arrow.array([1, 2, 3, 4, 5, 6, 7, 8, 9]); + ArrowArray = arrow.array.ListArray.fromArrays(Offsets, Values, Valid=Valid); + MatlabArray = {{[1; 2]; [3; 4; 5]; [6; 7; 8; 9]}}; + + TestArrowArray.SingleLevelList = struct( ... + ArrowArray=ArrowArray, ... + MatlabArray=MatlabArray, ... + Properties=struct(... + Type=Type, ... + NumElements=NumElements, ... + Valid=Valid, ... + Offsets=Offsets, ... + Values=Values ... + ) ... + ); + + %% Multi-level list (List>) + Type = arrow.list(arrow.list(arrow.float64())); + NumElements = int64(2); + Valid = true(1, NumElements); + Offsets = arrow.array(int32([0, 1, 3])); + Values = TestArrowArray.SingleLevelList.ArrowArray; + ArrowArray = arrow.array.ListArray.fromArrays(Offsets, Values, Valid=Valid); + MatlabArray = {{{[1; 2]}; {[3; 4; 5]; [6; 7; 8; 9]}}}; + + TestArrowArray.MultiLevelList = struct( ... + ArrowArray=ArrowArray, ... + MatlabArray=MatlabArray, ... + Properties=struct(... + Type=Type, ... + NumElements=NumElements, ... + Valid=Valid, ... + Offsets=Offsets, ... + Values=Values ... + ) ... + ); + end + + end + + methods (Test) + + function TestClass(testCase, TestArrowArray) + % Verify that the arrow.array.Array has the expected class. + testCase.verifyInstanceOf(TestArrowArray.ArrowArray, testCase.Traits.ArrayClassName); + end + + function TestProperties(testCase, TestArrowArray) + % Verify that all properties of the arrow.array.Array: + % + % 1. Return the expected value + % 2. Cannot be modified (i.e. are read-only). + % + properties = string(fieldnames(TestArrowArray.Properties)); + for ii = numel(properties) + property = properties(ii); + expected = TestArrowArray.Properties.(property); + actual = getfield(TestArrowArray.ArrowArray, property); + % Verify that the property returns the expected value. + testCase.verifyEqual(actual, expected); + fcn = @() setfield(TestArrowArray.ArrowArray, property, "NewValue"); + % Verify that the property cannot be modified (i.e. that it + % is read-only). + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end + + function TestToMatlab(testCase, TestArrowArray) + % Verify that the toMATLAB method returns the + % expected MATLAB array. + actual = TestArrowArray.ArrowArray.toMATLAB(); + expected = TestArrowArray.MatlabArray; + testCase.verifyEqual(actual, expected); + end + + function TestErrorIfEmptyOffsets(testCase) + % Verify that an arrow:array:ListArrayFromArraysFailed error + % is thrown if an empty Offsets array is provided to the + % arrow.array.ListArray.fromArrays. + offsets = arrow.array(int32.empty(0, 0)); + values = arrow.array([1, 2, 3]); + fcn = @() arrow.array.ListArray.fromArrays(offsets, values); + testCase.verifyError(fcn, "arrow:array:ListArrayFromArraysFailed"); + end + + end + +end diff --git a/matlab/test/arrow/type/traits/tListTraits.m b/matlab/test/arrow/type/traits/tListTraits.m index 93a8d8a0d10c5..444c977503123 100644 --- a/matlab/test/arrow/type/traits/tListTraits.m +++ b/matlab/test/arrow/type/traits/tListTraits.m @@ -17,15 +17,17 @@ properties TraitsConstructor = @arrow.type.traits.ListTraits - ArrayConstructor = missing - ArrayClassName = missing - ArrayProxyClassName = missing + ArrayConstructor = @arrow.array.ListArray + ArrayClassName = "arrow.array.ListArray" + ArrayProxyClassName = "arrow.array.proxy.ListArray" ArrayStaticConstructor = missing TypeConstructor = @arrow.type.ListType TypeClassName = "arrow.type.ListType" TypeProxyClassName = "arrow.type.proxy.ListType" + % The cell function works differently than other + % "type construction functions" in MATLAB. MatlabConstructor = missing - MatlabClassName = missing + MatlabClassName = "cell" end end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index c3940933679a5..cb746e08b1f8e 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -48,6 +48,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/time32_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/time64_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/struct_array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/list_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/chunked_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/wrap.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc"