From 1b929bceb34bdc899c1a09795b6c05fc2eb1019c Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Sat, 6 May 2023 20:29:32 -0700 Subject: [PATCH 01/13] Work in progress --- csharp/src/Apache.Arrow/C/CArrowArray.cs | 84 ++++++ .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 135 ++++++++++ .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 249 ++++++++++++++++++ .../src/Apache.Arrow/C/CArrowArrayStream.cs | 73 +++++ .../C/CArrowArrayStreamImporter.cs | 135 ++++++++++ .../src/Apache.Arrow/Ipc/ArrowStreamReader.cs | 2 +- .../src/Apache.Arrow/Ipc/IArrowArrayStream.cs | 24 ++ .../Memory/ImportedMemoryOwner.cs | 122 +++++++++ .../CDataInterfaceDataTests.cs | 92 +++++++ .../CDataInterfacePythonTests.cs | 238 +++++++++++++++++ 10 files changed, 1153 insertions(+), 1 deletion(-) create mode 100644 csharp/src/Apache.Arrow/C/CArrowArray.cs create mode 100644 csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs create mode 100644 csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs create mode 100644 csharp/src/Apache.Arrow/C/CArrowArrayStream.cs create mode 100644 csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs create mode 100644 csharp/src/Apache.Arrow/Ipc/IArrowArrayStream.cs create mode 100644 csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs create mode 100644 csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs diff --git a/csharp/src/Apache.Arrow/C/CArrowArray.cs b/csharp/src/Apache.Arrow/C/CArrowArray.cs new file mode 100644 index 0000000000000..37ff3760b6593 --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArray.cs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +using System; +using System.Runtime.InteropServices; + +namespace Apache.Arrow.C +{ + /// + /// An Arrow C Data Interface Schema, which represents the data in an exported array or record batch. + /// + /// + /// This is used to export or to other languages. It matches + /// the layout of the ArrowArray struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h. + /// + [StructLayout(LayoutKind.Sequential)] + public unsafe struct CArrowArray + { + public long length; + public long null_count; + public long offset; + public long n_buffers; + public long n_children; + public byte** buffers; + public CArrowArray** children; + public CArrowArray* dictionary; + public delegate* unmanaged[Stdcall] release; + public void* private_data; + + /// + /// Allocate and zero-initialize an unmanaged pointer of this type. + /// + /// + /// This pointer must later be freed by . + /// + public static CArrowArray* Create() + { + var ptr = (CArrowArray*)Marshal.AllocHGlobal(sizeof(CArrowArray)); + + ptr->length = 0; + ptr->n_buffers = 0; + ptr->offset = 0; + ptr->buffers = null; + ptr->n_children = 0; + ptr->children = null; + ptr->dictionary = null; + ptr->null_count = 0; + ptr->release = null; + ptr->private_data = null; + + return ptr; + } + + /// + /// Free a pointer that was allocated in . + /// + /// + /// Do not call this on a pointer that was allocated elsewhere. + /// + public static void Free(CArrowArray* schema) + { + if (schema->release != null) + { + // Call release if not already called. + schema->release(schema); + } + Marshal.FreeHGlobal((IntPtr)schema); + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs new file mode 100644 index 0000000000000..02e87fa254764 --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Runtime.InteropServices; + +namespace Apache.Arrow.C +{ + public static class CArrowArrayExporter + { + private unsafe delegate void ReleaseArrowArray(CArrowArray* nativeArray); + + /// + /// Export an to a . + /// + /// The array to export + /// An allocated but uninitialized CArrowArray pointer. + /// + /// + /// CArrowArray* exportPtr = CArrowArray.Create(); + /// CArrowArrayExporter.ExportArray(array, exportPtr); + /// foreign_import_function(exportPtr); + /// + /// + public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) + { + if (array == null) + { + throw new ArgumentNullException(nameof(array)); + } + if (cArray == null) + { + throw new ArgumentNullException(nameof(cArray)); + } + if (cArray->release != null) + { + throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray)); + } + + ConvertArray(array.Data, cArray); + } + + private unsafe static void ConvertArray(ArrayData array, CArrowArray* cArray) + { + // TODO: Figure out a sane memory management strategy + + cArray->length = array.Length; + cArray->offset = array.Offset; + cArray->null_count = array.NullCount; + cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->private_data = FromDisposable(array); + + cArray->n_buffers = array.Buffers?.Length ?? 0; + cArray->buffers = null; + if (cArray->n_buffers > 0) + { + cArray->buffers = (byte**)Marshal.AllocCoTaskMem(array.Buffers.Length * IntPtr.Size); + for (int i = 0; i < array.Buffers.Length; i++) + { + cArray->buffers[i] = (byte*)array.Buffers[i].Memory.Pin().Pointer; + } + } + + cArray->n_children = array.Children?.Length ?? 0; + cArray->children = null; + if (cArray->n_children > 0) + { + cArray->children = (CArrowArray**)Marshal.AllocCoTaskMem(IntPtr.Size * array.Children.Length); + for (int i = 0; i < array.Children.Length; i++) + { + cArray->children[i] = CArrowArray.Create(); + ConvertArray(array.Children[i], cArray->children[i]); + } + } + + cArray->dictionary = null; + if (array.Dictionary != null) + { + cArray->dictionary = CArrowArray.Create(); + ConvertArray(array.Dictionary, cArray->dictionary); + } + } + + private unsafe static void ReleaseArray(CArrowArray* nativeArray) + { + if (nativeArray->n_children != 0) + { + for (int i = 0; i < nativeArray->n_children; i++) + { + ReleaseArray(nativeArray->children[i]); + Marshal.FreeCoTaskMem((IntPtr)nativeArray->children[i]); + } + Marshal.FreeCoTaskMem((IntPtr)nativeArray->children); + nativeArray->children = null; + nativeArray->n_children = 0; + } + + if (nativeArray->buffers != null) + { + Marshal.FreeCoTaskMem((IntPtr)nativeArray->buffers); + nativeArray->buffers = null; + } + + Dispose(&nativeArray->private_data); + nativeArray->release = null; + } + + private unsafe static void* FromDisposable(IDisposable disposable) + { + GCHandle gch = GCHandle.Alloc(disposable); + return (void*)GCHandle.ToIntPtr(gch); + } + + private unsafe static void Dispose(void** ptr) + { + GCHandle gch = GCHandle.FromIntPtr((IntPtr)(*ptr)); + ((IDisposable)gch.Target).Dispose(); + gch.Free(); + *ptr = null; + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs new file mode 100644 index 0000000000000..5939219db46e0 --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +using System; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.C +{ + public static class CArrowArrayImporter + { + /// + /// Import C pointer as an . + /// + /// + /// This will call the release callback once all of the buffers in the returned + /// IArrowArray are disposed. + /// + /// + /// Typically, you will allocate an uninitialized CArrowArray pointer, + /// pass that to external function, and then use this method to import + /// the result. + /// + /// + /// CArrowArray* importedPtr = CArrowArray.Create(); + /// foreign_export_function(importedPtr); + /// IArrowArray importedArray = CArrowArrayImporter.ImportArray(importedPtr); + /// + /// + public static unsafe IArrowArray ImportArray(CArrowArray* ptr, IArrowType type) + { + ImportedArrowArray importedArray = null; + try + { + importedArray = new ImportedArrowArray(ptr); + return importedArray.GetAsArray(type); + } + finally + { + importedArray?.Release(); + } + } + + /// + /// Import C pointer as a . + /// + /// + /// This will call the release callback once all of the buffers in the returned + /// RecordBatch are disposed. + /// + /// + /// Typically, you will allocate an uninitialized CArrowArray pointer, + /// pass that to external function, and then use this method to import + /// the result. + /// + /// + /// CArrowArray* importedPtr = CArrowArray.Create(); + /// foreign_export_function(importedPtr); + /// RecordBatch batch = CArrowArrayImporter.ImportRecordBatch(importedPtr, schema); + /// + /// + public static unsafe RecordBatch ImportRecordBatch(CArrowArray* ptr, Schema schema) + { + ImportedArrowArray importedArray = null; + try + { + importedArray = new ImportedArrowArray(ptr); + return importedArray.GetAsRecordBatch(schema); + } + finally + { + importedArray?.Release(); + } + } + + private sealed unsafe class ImportedArrowArray : ImportedMemoryOwner + { + private readonly CArrowArray* _cArray; + + public ImportedArrowArray(CArrowArray* cArray) + { + if (cArray == null) + { + throw new ArgumentNullException(nameof(cArray)); + } + _cArray = cArray; + if (_cArray->release == null) + { + throw new ArgumentException("Tried to import an array that has already been released.", nameof(cArray)); + } + } + + protected override void FinalRelease() + { + if (_cArray->release != null) + { + _cArray->release(_cArray); + } + } + + public IArrowArray GetAsArray(IArrowType type) + { + // TODO: Cleanup more deterministically when there's an exception + return ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray, type)); + } + + public RecordBatch GetAsRecordBatch(Schema schema) + { + // TODO: Cleanup more deterministically when there's an exception + IArrowArray[] arrays = new IArrowArray[schema.FieldsList.Count]; + for (int i = 0; i < _cArray->n_children; i++) + { + arrays[i] = ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray->children[i], schema.FieldsList[i].DataType)); + } + return new RecordBatch(schema, arrays, checked((int)_cArray->length)); + } + + private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) + { + ArrayData[] children = null; + if (cArray->n_children > 0) + { + children = new ArrayData[cArray->n_children]; + for (int i = 0; i < cArray->n_children; i++) + { + children[i] = GetAsArrayData(cArray->children[i], type); + } + } + + ArrowBuffer[] buffers = null; + switch (type.TypeId) + { + case ArrowTypeId.String: + case ArrowTypeId.Binary: + buffers = ImportByteArrayBuffers(cArray); + break; + case ArrowTypeId.List: + case ArrowTypeId.Struct: + case ArrowTypeId.Union: + case ArrowTypeId.Dictionary: + throw new NotSupportedException(); + case ArrowTypeId.Map: + throw new NotSupportedException(); + case ArrowTypeId.Null: // TODO + default: + if (type is FixedWidthType fixedWidthType) + { + buffers = ImportBuffers(cArray, fixedWidthType.BitWidth); + } + else + { + throw new InvalidOperationException(); + } + break; + } + + ArrayData dictionary = null; + if (cArray->dictionary != null) + { + dictionary = GetAsArrayData(cArray->dictionary, type); + } + + return new ArrayData( + type, + checked((int)cArray->length), + checked((int)cArray->null_count), + checked((int)cArray->offset), + buffers, + children, + dictionary); + } + + private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 3) + { + return null; + } + + // validity, offsets, data + int length = checked((int)cArray->length); + int offsetsLength = (length + 1) * 4; + int* offsets = (int*)cArray->buffers[1]; + int valuesLength = offsets[length]; + int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); + + ArrowBuffer[] buffers = new ArrowBuffer[3]; + buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength)); + buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, valuesLength)); + if (cArray->buffers[0] != null) + { + buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength)); + } + else + { + buffers[0] = ArrowBuffer.Empty; + } + + return buffers; + } + + private ArrowBuffer[] ImportBuffers(CArrowArray* cArray, int bitWidth) + { + if (cArray->n_buffers != 2) + { + return null; + } + + // validity, data + int length = checked((int)cArray->length); + int valuesLength; + if (bitWidth >= 8) + valuesLength = checked((int)(cArray->length * bitWidth / 8)); + else + valuesLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); + + int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); + + ArrowBuffer[] buffers = new ArrowBuffer[2]; + if (cArray->buffers[0] != null) + { + buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, validityLength)); + } + else + { + buffers[0] = ArrowBuffer.Empty; + } + + buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, valuesLength)); + + return buffers; + } + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs new file mode 100644 index 0000000000000..216b44d7a79ff --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Runtime.InteropServices; +using Apache.Arrow.Ipc; + +namespace Apache.Arrow.C +{ + /// + /// An Arrow C Data Interface ArrowArrayStream, which represents a stream of record batches. + /// + /// + /// This is used to export to other languages. It matches the layout of the + /// ArrowArrayStream struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h. + /// + [StructLayout(LayoutKind.Sequential)] + public unsafe struct CArrowArrayStream + { + public delegate* unmanaged[Stdcall] get_schema; + public delegate* unmanaged[Stdcall] get_next; + public delegate* unmanaged[Stdcall] get_last_error; + public delegate* unmanaged[Stdcall] release; + public void* private_data; + + /// + /// Allocate and zero-initialize an unmanaged pointer of this type. + /// + /// + /// This pointer must later be freed by . + /// + public static CArrowArrayStream* Create() + { + var ptr = (CArrowArrayStream*)Marshal.AllocHGlobal(sizeof(CArrowArrayStream)); + + ptr->get_schema = null; + ptr->get_next = null; + ptr->get_last_error = null; + ptr->release = null; + ptr->private_data = null; + + return ptr; + } + + /// + /// Free a pointer that was allocated in . + /// + /// + /// Do not call this on a pointer that was allocated elsewhere. + /// + public static void Free(CArrowSchema* schema) + { + if (schema->release != null) + { + // Call release if not already called. + schema->release(schema); + } + Marshal.FreeHGlobal((IntPtr)schema); + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs new file mode 100644 index 0000000000000..1bfa90ac5c9e4 --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +using System; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Ipc; + +namespace Apache.Arrow.C +{ + public static class CArrowArrayStreamImporter + { + /// + /// Import C pointer as an . + /// + /// + /// This will call the release callback on the passed struct if the function fails. + /// Otherwise, the release callback is called when the IArrowArrayStream is disposed. + /// + /// + /// Typically, you will allocate an uninitialized CArrowArrayStream pointer, + /// pass that to external function, and then use this method to import + /// the result. + /// + /// + /// CArrowArrayStream* importedPtr = CArrowArrayStream.Create(); + /// foreign_export_function(importedPtr); + /// IArrowArrayStream importedStream = CArrowArrayStreamImporter.ImportStream(importedPtr); + /// + /// + public static unsafe IArrowArrayStream ImportArrayStream(CArrowArrayStream* ptr) + { + return new ImportedArrowArrayStream(ptr); + } + + private sealed unsafe class ImportedArrowArrayStream : IArrowArrayStream + { + private readonly CArrowArrayStream* _cArrayStream; + private readonly Schema _schema; + private bool _disposed; + + public ImportedArrowArrayStream(CArrowArrayStream* cArrayStream) + { + if (cArrayStream == null) + { + throw new ArgumentNullException(nameof(cArrayStream)); + } + _cArrayStream = cArrayStream; + if (_cArrayStream->release == null) + { + throw new ArgumentException("Tried to import an array stream that has already been released.", nameof(cArrayStream)); + } + + CArrowSchema* cSchema = CArrowSchema.Create(); + try + { + if (_cArrayStream->get_schema(_cArrayStream, cSchema) != 0) + { + throw new Exception("This needs to be better"); + } + _schema = CArrowSchemaImporter.ImportSchema(cSchema); + } + finally + { + if (_schema == null) + { + CArrowSchema.Free(cSchema); + } + } + } + + ~ImportedArrowArrayStream() + { + Dispose(); + } + + public Schema Schema => _schema; + + public ValueTask ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) + { + if (_disposed) + { + throw new ObjectDisposedException(typeof(ImportedArrowArrayStream).Name); + } + + RecordBatch result = null; + CArrowArray* cArray = CArrowArray.Create(); + try + { + if (_cArrayStream->get_next(_cArrayStream, cArray) != 0) + { + throw new Exception("This too needs to be better"); + } + if (cArray->release != null) + { + result = CArrowArrayImporter.ImportRecordBatch(cArray, _schema); + } + } + finally + { + if (result == null) + { + CArrowArray.Free(cArray); + } + } + + return new ValueTask(result); + } + + public void Dispose() + { + if (!_disposed && _cArrayStream->release != null) + { + _disposed = true; + _cArrayStream->release(_cArrayStream); + } + GC.SuppressFinalize(this); + } + } + } +} diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs index ca13a3c279384..cdcfe7875da22 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs @@ -24,7 +24,7 @@ namespace Apache.Arrow.Ipc /// /// Represents a reader that can read Arrow streams. /// - public class ArrowStreamReader : IArrowReader, IDisposable + public class ArrowStreamReader : IArrowReader, IArrowArrayStream, IDisposable { private protected readonly ArrowReaderImplementation _implementation; diff --git a/csharp/src/Apache.Arrow/Ipc/IArrowArrayStream.cs b/csharp/src/Apache.Arrow/Ipc/IArrowArrayStream.cs new file mode 100644 index 0000000000000..8dae61d490e5f --- /dev/null +++ b/csharp/src/Apache.Arrow/Ipc/IArrowArrayStream.cs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Ipc +{ + public interface IArrowArrayStream : IArrowReader, IDisposable + { + Schema Schema { get; } + } +} diff --git a/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs b/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs new file mode 100644 index 0000000000000..55fd1ddfbb2ae --- /dev/null +++ b/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Threading; + +namespace Apache.Arrow.Memory +{ + internal abstract class ImportedMemoryOwner + { + long _referenceCount; + long _managedMemory; + + protected ImportedMemoryOwner() + { + _referenceCount = 1; + } + + public IMemoryOwner AddMemory(IntPtr ptr, int offset, int length) + { + if (_referenceCount <= 0) + { + throw new ObjectDisposedException(typeof(ImportedMemoryManager).Name); + } + + IMemoryOwner memory = new ImportedMemoryManager(this, ptr, offset, length); + Interlocked.Increment(ref _referenceCount); + Interlocked.Add(ref _managedMemory, length); + if (length > 0) + { + GC.AddMemoryPressure(length); + } + return memory; + } + + public void Release() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + if (_managedMemory > 0) + { + GC.RemoveMemoryPressure(_managedMemory); + } + FinalRelease(); + } + } + + protected abstract void FinalRelease(); + + sealed private class ImportedMemoryManager : MemoryManager + { + private ImportedMemoryOwner _owner; + private IntPtr _ptr; + private readonly int _offset; + private readonly int _length; + + public ImportedMemoryManager(ImportedMemoryOwner owner, IntPtr ptr, int offset, int length) + { + _owner = owner; + _ptr = ptr; + _offset = offset; + _length = length; + } + + ~ImportedMemoryManager() + { + Dispose(false); + } + + public override unsafe Span GetSpan() + { + void* ptr = CalculatePointer(0); + return new Span(ptr, _length); + } + + public override unsafe MemoryHandle Pin(int elementIndex = 0) + { + // NOTE: Imported memory doesn't require GC pinning because by definition it's not + // managed by the garbage collector. + + void* ptr = CalculatePointer(elementIndex); + return new MemoryHandle(ptr, default, this); + } + + public override void Unpin() + { + // SEE: Pin implementation + return; + } + + protected override void Dispose(bool disposing) + { + // Only free once. + + ImportedMemoryOwner owner = Interlocked.Exchange(ref _owner, null); + if (owner != null) + { + _ptr = IntPtr.Zero; + owner.Release(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe void* CalculatePointer(int index) => + (_ptr + _offset + index).ToPointer(); + } + } +} diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs new file mode 100644 index 0000000000000..3a8a20bc8375b --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Runtime.InteropServices; +using Apache.Arrow.C; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class CDataInterfaceDataTests + { + private IArrowArray GetTestArray() + { + var builder = new StringArray.Builder(); + builder.Append("hello"); + builder.Append("world"); + builder.AppendNull(); + builder.Append("foo"); + builder.Append("bar"); + return builder.Build(); + } + + [Fact] + public unsafe void InitializeArrayZeroed() + { + CArrowArray* cArray = CArrowArray.Create(); + + Assert.Equal(0, cArray->length); + Assert.Equal(0, cArray->null_count); + Assert.Equal(0, cArray->offset); + Assert.Equal(0, cArray->n_buffers); + Assert.Equal(0, cArray->n_children); + Assert.True(cArray->buffers == null); + Assert.True(cArray->children == null); + Assert.True(cArray->dictionary == null); + Assert.True(cArray->release == null); + Assert.True(cArray->private_data == null); + + CArrowArray.Free(cArray); + } + + [Fact] + public unsafe void CallsReleaseForValid() + { + IArrowArray array = GetTestArray(); + CArrowArray* cArray = CArrowArray.Create(); + CArrowArrayExporter.ExportArray(array, cArray); + Assert.False(cArray->release == null); + CArrowArrayImporter.ImportArray(cArray, array.Data.DataType).Dispose(); + Assert.True(cArray->release == null); + CArrowArray.Free(cArray); + } + + [Fact] + public unsafe void CallsReleaseForInvalid() + { + // Make sure we call release callback, even if the imported schema + // is invalid. + CArrowArray* cArray = CArrowArray.Create(); + + bool wasCalled = false; + var releaseCallback = (CArrowArray* cArray) => + { + wasCalled = true; + cArray->release = null; + }; + cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate( + releaseCallback); + + Assert.Throws(() => + { + CArrowArrayImporter.ImportArray(cArray, GetTestArray().Data.DataType); + }); + Assert.True(wasCalled); + CArrowArray.Free(cArray); + } + } +} diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 750877cc8da16..1fe1121f45bb8 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -19,6 +19,7 @@ using System.Linq; using System.Runtime.InteropServices; using Apache.Arrow.C; +using Apache.Arrow.Ipc; using Apache.Arrow.Types; using Python.Runtime; using Xunit; @@ -166,6 +167,63 @@ private static dynamic GetPythonSchema() } } + private IArrowArray GetTestArray() + { + var builder = new StringArray.Builder(); + builder.Append("hello"); + builder.Append("world"); + builder.AppendNull(); + builder.Append("foo"); + builder.Append("bar"); + return builder.Build(); + } + + private dynamic GetPythonArray() + { + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + return pa.array(new[] { "hello", "world", null, "foo", "bar" }); + } + } + + private RecordBatch GetTestRecordBatch() + { + Field[] fields = new[] + { + new Field("col1", Int64Type.Default, true), + new Field("col2", StringType.Default, true), + new Field("col3", DoubleType.Default, false), + }; + return new RecordBatch( + new Schema(fields, null), + new IArrowArray[] + { + new Int64Array.Builder().AppendRange(new long[] { 1, 2, 3 }).AppendNull().Append(5).Build(), + GetTestArray(), + new DoubleArray.Builder().AppendRange(new double[] { 0.0, 1.4, 2.5, 3.6, 4.7 }).Build(), + }, + 5); + } + + private dynamic GetPythonRecordBatch() + { + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic table = pa.table( + new PyList(new PyObject[] + { + pa.array(new long?[] { 1, 2, 3, null, 5 }), + pa.array(new[] { "hello", "world", null, "foo", "bar" }), + pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + }), + new[] { "col1", "col2", "col3" }); + + return table.to_batches()[0]; + } + } + // Schemas created in Python, used in CSharp [SkippableFact] public unsafe void ImportType() @@ -343,5 +401,185 @@ public unsafe void ExportSchema() // Since we allocated, we are responsible for freeing the pointer. CArrowSchema.Free(cSchema); } + + [SkippableFact] + public unsafe void ImportArray() + { + CArrowArray* cArray = CArrowArray.Create(); + CArrowSchema* cSchema = CArrowSchema.Create(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic array = pa.array(new[] { "hello", "world", null, "foo", "bar" }); + + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + array._export_to_c(arrayPtr, schemaPtr); + } + + ArrowType type = CArrowSchemaImporter.ImportType(cSchema); + IArrowArray importedArray = CArrowArrayImporter.ImportArray(cArray, type); + + Assert.Equal(5, importedArray.Length); + } + + [SkippableFact] + public unsafe void ImportRecordBatch() + { + CArrowArray* cArray = CArrowArray.Create(); + CArrowSchema* cSchema = CArrowSchema.Create(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic table = pa.table( + new PyList(new PyObject[] + { + pa.array(new long?[] { 1, 2, 3, null, 5 }), + pa.array(new[] { "hello", "world", null, "foo", "bar" }), + pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + }), + new[] { "col1", "col2", "col3" }); + + dynamic batch = table.to_batches()[0]; + + long batchPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + batch._export_to_c(batchPtr, schemaPtr); + } + + Schema schema = CArrowSchemaImporter.ImportSchema(cSchema); + RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); + + Assert.Equal(5, recordBatch.Length); + } + + [SkippableFact] + public unsafe void ImportRecordBatchReader() + { + CArrowArray* cArray = CArrowArray.Create(); + CArrowSchema* cSchema = CArrowSchema.Create(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic table = pa.table( + new PyList(new PyObject[] + { + pa.array(new long?[] { 1, 2, 3, null, 5 }), + pa.array(new[] { "hello", "world", null, "foo", "bar" }), + pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + }), + new[] { "col1", "col2", "col3" }); + + dynamic batch = table.to_batches()[0]; + + long batchPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + batch._export_to_c(batchPtr, schemaPtr); + } + + Schema schema = CArrowSchemaImporter.ImportSchema(cSchema); + RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); + + Assert.Equal(5, recordBatch.Length); + } + + [SkippableFact] + public unsafe void ImportArrayStream() + { + CArrowArrayStream* cArrayStream = CArrowArrayStream.Create(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic table = pa.table( + new PyList(new PyObject[] + { + pa.array(new long?[] { 1, 2, 3, null, 5 }), + pa.array(new[] { "hello", "world", null, "foo", "bar" }), + pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + }), + new[] { "col1", "col2", "col3" }); + + dynamic batchReader = pa.RecordBatchReader.from_batches(table.schema, table.to_batches()); + + long streamPtr = ((IntPtr)cArrayStream).ToInt64(); + batchReader._export_to_c(streamPtr); + } + + IArrowArrayStream stream = CArrowArrayStreamImporter.ImportArrayStream(cArrayStream); + var batch1 = stream.ReadNextRecordBatchAsync().Result; + Assert.Equal(5, batch1.Length); + + var batch2 = stream.ReadNextRecordBatchAsync().Result; + Assert.Null(batch2); + } + + [SkippableFact] + public unsafe void ExportArray() + { + IArrowArray array = GetTestArray(); + dynamic pyArray = GetPythonArray(); + + CArrowArray* cArray = CArrowArray.Create(); + CArrowArrayExporter.ExportArray(array, cArray); + + CArrowSchema* cSchema = CArrowSchema.Create(); + CArrowSchemaExporter.ExportType(array.Data.DataType, cSchema); + + // For Python, we need to provide the pointers + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic exportedPyArray = pa.Array._import_from_c(arrayPtr, schemaPtr); + Assert.True(exportedPyArray == pyArray); + } + + // Since we allocated, we are responsible for freeing the pointer. + CArrowArray.Free(cArray); + } + + [SkippableFact] + public unsafe void ExportBatch() + { + RecordBatch batch = GetTestRecordBatch(); + dynamic pyBatch = GetPythonRecordBatch(); + + CArrowArray* cArray = CArrowArray.Create(); + CArrowSchema* cSchema = CArrowSchema.Create(); + +#if TODO + CArrowArrayExporter.ExportRecordBatch(batch, cArray); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic table = pa.table( + new PyList(new PyObject[] + { + pa.array(new long?[] { 1, 2, 3, null, 5 }), + pa.array(new[] { "hello", "world", null, "foo", "bar" }), + pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + }), + new[] { "col1", "col2", "col3" }); + + dynamic batch = table.to_batches()[0]; + + long batchPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + batch._export_to_c(batchPtr, schemaPtr); + } + + Schema schema = CArrowSchemaImporter.ImportSchema(cSchema); + RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); + + Assert.Equal(5, recordBatch.Length); +#endif + } } } From 5f3013c4dec4373d6e73b80c831fcc7cc41ee463 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Sun, 7 May 2023 13:13:34 -0700 Subject: [PATCH 02/13] Mostly working, but without correct memory management or a full complement of supported types. --- csharp/src/Apache.Arrow/ArrowBuffer.cs | 9 + .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 102 +++++++++--- .../src/Apache.Arrow/C/CArrowArrayStream.cs | 8 +- .../C/CArrowArrayStreamExporter.cs | 120 ++++++++++++++ .../Memory/INativeAllocationOwner.cs | 24 +++ csharp/src/Apache.Arrow/Memory/IShareable.cs | 22 +++ .../Memory/NativeMemoryAllocator.cs | 11 ++ .../Memory/NativeMemoryManager.cs | 43 +++-- .../Memory/SharedNativeAllocationOwner.cs | 29 ++++ .../CDataInterfacePythonTests.cs | 156 ++++++++++++------ 10 files changed, 440 insertions(+), 84 deletions(-) create mode 100644 csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs create mode 100644 csharp/src/Apache.Arrow/Memory/INativeAllocationOwner.cs create mode 100644 csharp/src/Apache.Arrow/Memory/IShareable.cs create mode 100644 csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.cs b/csharp/src/Apache.Arrow/ArrowBuffer.cs index f8e675921684d..653c9c2d42634 100644 --- a/csharp/src/Apache.Arrow/ArrowBuffer.cs +++ b/csharp/src/Apache.Arrow/ArrowBuffer.cs @@ -72,5 +72,14 @@ public void Dispose() { _memoryOwner?.Dispose(); } + + internal bool TryShare(SharedNativeAllocationOwner newOwner) + { + if (_memoryOwner == null && IsEmpty) + { + return true; + } + return _memoryOwner is IShareable shareableMemory && shareableMemory.TryShare(newOwner); + } } } diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs index 02e87fa254764..3d5eedd5e1db0 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -16,12 +16,13 @@ using System; using System.Runtime.InteropServices; +using Apache.Arrow.Memory; namespace Apache.Arrow.C { public static class CArrowArrayExporter { - private unsafe delegate void ReleaseArrowArray(CArrowArray* nativeArray); + private unsafe delegate void ReleaseArrowArray(CArrowArray* cArray); /// /// Export an to a . @@ -50,13 +51,41 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray)); } - ConvertArray(array.Data, cArray); + ConvertArray(new SharedNativeAllocationOwner(), array.Data, cArray); } - private unsafe static void ConvertArray(ArrayData array, CArrowArray* cArray) + /// + /// Export a to a . + /// + /// The record batch to export + /// An allocated but uninitialized CArrowArray pointer. + /// + /// + /// CArrowArray* exportPtr = CArrowArray.Create(); + /// CArrowArrayExporter.ExportRecordBatch(batch, exportPtr); + /// foreign_import_function(exportPtr); + /// + /// + public static unsafe void ExportRecordBatch(RecordBatch batch, CArrowArray* cArray) { - // TODO: Figure out a sane memory management strategy + if (batch == null) + { + throw new ArgumentNullException(nameof(batch)); + } + if (cArray == null) + { + throw new ArgumentNullException(nameof(cArray)); + } + if (cArray->release != null) + { + throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray)); + } + ConvertRecordBatch(new SharedNativeAllocationOwner(), batch, cArray); + } + + private unsafe static void ConvertArray(SharedNativeAllocationOwner sharedOwner, ArrayData array, CArrowArray* cArray) + { cArray->length = array.Length; cArray->offset = array.Offset; cArray->null_count = array.NullCount; @@ -70,7 +99,12 @@ private unsafe static void ConvertArray(ArrayData array, CArrowArray* cArray) cArray->buffers = (byte**)Marshal.AllocCoTaskMem(array.Buffers.Length * IntPtr.Size); for (int i = 0; i < array.Buffers.Length; i++) { - cArray->buffers[i] = (byte*)array.Buffers[i].Memory.Pin().Pointer; + ArrowBuffer buffer = array.Buffers[i]; + if (!buffer.TryShare(sharedOwner)) + { + throw new NotSupportedException(); // TODO + } + cArray->buffers[i] = (byte*)buffer.Memory.Pin().Pointer; } } @@ -82,7 +116,7 @@ private unsafe static void ConvertArray(ArrayData array, CArrowArray* cArray) for (int i = 0; i < array.Children.Length; i++) { cArray->children[i] = CArrowArray.Create(); - ConvertArray(array.Children[i], cArray->children[i]); + ConvertArray(sharedOwner, array.Children[i], cArray->children[i]); } } @@ -90,32 +124,60 @@ private unsafe static void ConvertArray(ArrayData array, CArrowArray* cArray) if (array.Dictionary != null) { cArray->dictionary = CArrowArray.Create(); - ConvertArray(array.Dictionary, cArray->dictionary); + ConvertArray(sharedOwner, array.Dictionary, cArray->dictionary); + } + } + + private unsafe static void ConvertRecordBatch(SharedNativeAllocationOwner sharedOwner, RecordBatch batch, CArrowArray* cArray) + { + cArray->length = batch.Length; + cArray->offset = 0; + cArray->null_count = 0; + cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->private_data = FromDisposable(batch); + + cArray->n_buffers = 1; + cArray->buffers = (byte**)Marshal.AllocCoTaskMem(IntPtr.Size); + + cArray->n_children = batch.ColumnCount; + cArray->children = null; + if (cArray->n_children > 0) + { + cArray->children = (CArrowArray**)Marshal.AllocCoTaskMem(IntPtr.Size * batch.ColumnCount); + int i = 0; + foreach (IArrowArray child in batch.Arrays) + { + cArray->children[i] = CArrowArray.Create(); + ConvertArray(sharedOwner, child.Data, cArray->children[i]); + i++; + } } + + cArray->dictionary = null; } - private unsafe static void ReleaseArray(CArrowArray* nativeArray) + private unsafe static void ReleaseArray(CArrowArray* cArray) { - if (nativeArray->n_children != 0) + if (cArray->n_children != 0) { - for (int i = 0; i < nativeArray->n_children; i++) + for (int i = 0; i < cArray->n_children; i++) { - ReleaseArray(nativeArray->children[i]); - Marshal.FreeCoTaskMem((IntPtr)nativeArray->children[i]); + ReleaseArray(cArray->children[i]); + Marshal.FreeCoTaskMem((IntPtr)cArray->children[i]); } - Marshal.FreeCoTaskMem((IntPtr)nativeArray->children); - nativeArray->children = null; - nativeArray->n_children = 0; + Marshal.FreeCoTaskMem((IntPtr)cArray->children); + cArray->children = null; + cArray->n_children = 0; } - if (nativeArray->buffers != null) + if (cArray->buffers != null) { - Marshal.FreeCoTaskMem((IntPtr)nativeArray->buffers); - nativeArray->buffers = null; + Marshal.FreeCoTaskMem((IntPtr)cArray->buffers); + cArray->buffers = null; } - Dispose(&nativeArray->private_data); - nativeArray->release = null; + Dispose(&cArray->private_data); + cArray->release = null; } private unsafe static void* FromDisposable(IDisposable disposable) diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs index 216b44d7a79ff..1430ba6cb51b7 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs @@ -60,14 +60,14 @@ public unsafe struct CArrowArrayStream /// /// Do not call this on a pointer that was allocated elsewhere. /// - public static void Free(CArrowSchema* schema) + public static void Free(CArrowArrayStream* arrayStream) { - if (schema->release != null) + if (arrayStream->release != null) { // Call release if not already called. - schema->release(schema); + arrayStream->release(arrayStream); } - Marshal.FreeHGlobal((IntPtr)schema); + Marshal.FreeHGlobal((IntPtr)arrayStream); } } } diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs new file mode 100644 index 0000000000000..a538525ff527f --- /dev/null +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Runtime.InteropServices; +using Apache.Arrow.Memory; +using Apache.Arrow.Ipc; + +namespace Apache.Arrow.C +{ + public static class CArrowArrayStreamExporter + { + private unsafe delegate int GetSchemaArrayStream(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema); + private unsafe delegate int GetNextArrayStream(CArrowArrayStream* cArrayStream, CArrowArray* cArray); + private unsafe delegate byte* GetLastErrorArrayStream(CArrowArrayStream* cArrayStream); + private unsafe delegate void ReleaseArrayStream(CArrowArrayStream* cArrayStream); + + /// + /// Export an to a . + /// + /// The array stream to export + /// An allocated but uninitialized CArrowArrayStream pointer. + /// + /// + /// CArrowArrayStream* exportPtr = CArrowArrayStream.Create(); + /// CArrowArrayStreamExporter.ExportArray(arrayStream, exportPtr); + /// foreign_import_function(exportPtr); + /// + /// + public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArrowArrayStream* cArrayStream) + { + if (arrayStream == null) + { + throw new ArgumentNullException(nameof(arrayStream)); + } + if (arrayStream == null) + { + throw new ArgumentNullException(nameof(arrayStream)); + } + if (cArrayStream->release != null) + { + throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArrayStream)); + } + + cArrayStream->private_data = FromDisposable(arrayStream); + cArrayStream->get_schema = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetSchema); + cArrayStream->get_next = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetNext); + cArrayStream->get_last_error = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetLastError); + cArrayStream->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(Release); + } + + private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema) + { + try + { + IArrowArrayStream arrayStream = FromPointer(cArrayStream->private_data); + CArrowSchemaExporter.ExportSchema(arrayStream.Schema, cSchema); + return 0; + } + catch (Exception) + { + return 1; + } + } + + private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* cArray) + { + try + { + cArray->release = null; + IArrowArrayStream arrayStream = FromPointer(cArrayStream->private_data); + RecordBatch recordBatch = arrayStream.ReadNextRecordBatchAsync().Result; + if (recordBatch != null) + { + CArrowArrayExporter.ExportRecordBatch(recordBatch, cArray); + } + return 0; + } + catch (Exception) + { + return 1; + } + } + + private unsafe static byte* GetLastError(CArrowArrayStream* cArrayStream) + { + return null; + } + + private unsafe static void Release(CArrowArrayStream* cArrayStream) + { + FromPointer(cArrayStream->private_data).Dispose(); + } + + private unsafe static void* FromDisposable(IDisposable disposable) + { + GCHandle gch = GCHandle.Alloc(disposable); + return (void*)GCHandle.ToIntPtr(gch); + } + + private unsafe static IArrowArrayStream FromPointer(void* ptr) + { + GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); + return (IArrowArrayStream)gch.Target; + } + } +} diff --git a/csharp/src/Apache.Arrow/Memory/INativeAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/INativeAllocationOwner.cs new file mode 100644 index 0000000000000..a1a73ebf178dd --- /dev/null +++ b/csharp/src/Apache.Arrow/Memory/INativeAllocationOwner.cs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Memory +{ + internal interface INativeAllocationOwner + { + void Release(IntPtr ptr, int offset, int length); + } +} diff --git a/csharp/src/Apache.Arrow/Memory/IShareable.cs b/csharp/src/Apache.Arrow/Memory/IShareable.cs new file mode 100644 index 0000000000000..6a6d5028365fe --- /dev/null +++ b/csharp/src/Apache.Arrow/Memory/IShareable.cs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Memory +{ + internal interface IShareable + { + bool TryShare(SharedNativeAllocationOwner newOwner); + } +} diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs index 69a0467471722..57f85511b2bdc 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs @@ -21,6 +21,8 @@ namespace Apache.Arrow.Memory { public class NativeMemoryAllocator : MemoryAllocator { + internal static readonly INativeAllocationOwner ExclusiveOwner = new NativeAllocationOwner(); + public NativeMemoryAllocator(int alignment = DefaultAlignment) : base(alignment) { } @@ -47,5 +49,14 @@ protected override IMemoryOwner AllocateInternal(int length, out int bytes return manager; } + + private sealed class NativeAllocationOwner : INativeAllocationOwner + { + public void Release(IntPtr ptr, int offset, int length) + { + Marshal.FreeHGlobal(ptr); + GC.RemoveMemoryPressure(length); + } + } } } diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 00eb7dc168c2c..9a6be27f05159 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -15,24 +15,32 @@ using System; using System.Buffers; -using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Threading; namespace Apache.Arrow.Memory { - public class NativeMemoryManager: MemoryManager + public class NativeMemoryManager : MemoryManager, IShareable { private IntPtr _ptr; private readonly int _offset; private readonly int _length; + private INativeAllocationOwner _owner; public NativeMemoryManager(IntPtr ptr, int offset, int length) { _ptr = ptr; _offset = offset; _length = length; + _owner = NativeMemoryAllocator.ExclusiveOwner; + } + + internal NativeMemoryManager(INativeAllocationOwner owner, IntPtr ptr, int offset, int length) + { + _ptr = ptr; + _offset = offset; + _length = length; + _owner = owner; } ~NativeMemoryManager() @@ -64,20 +72,35 @@ public override void Unpin() protected override void Dispose(bool disposing) { // Only free once. - - lock (this) + INativeAllocationOwner owner = Interlocked.Exchange(ref _owner, null); + if (_owner != null) { - if (_ptr != IntPtr.Zero) + IntPtr ptr = Interlocked.Exchange(ref _ptr, IntPtr.Zero); + if (ptr != IntPtr.Zero) { - Marshal.FreeHGlobal(_ptr); - Interlocked.Exchange(ref _ptr, IntPtr.Zero); - GC.RemoveMemoryPressure(_length); + owner.Release(ptr, _offset, _length); } } } + bool IShareable.TryShare(SharedNativeAllocationOwner newOwner) + { + INativeAllocationOwner oldOwner = Interlocked.Exchange(ref _owner, newOwner); + if (oldOwner == null) + { + _owner = null; + return false; + } + + if (oldOwner is SharedNativeAllocationOwner sharedOwner) + { + } + + return true; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private unsafe void* CalculatePointer(int index) => + private unsafe void* CalculatePointer(int index) => (_ptr + _offset + index).ToPointer(); } } diff --git a/csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs new file mode 100644 index 0000000000000..47e26171624e0 --- /dev/null +++ b/csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow.Memory +{ + internal sealed class SharedNativeAllocationOwner : INativeAllocationOwner + { + // readonly List _pointers = new List(); + + public void Release(IntPtr ptr, int offset, int length) + { + } + } +} diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 1fe1121f45bb8..4d54c03863db4 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -18,6 +18,8 @@ using System.IO; using System.Linq; using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; using Apache.Arrow.C; using Apache.Arrow.Ipc; using Apache.Arrow.Types; @@ -46,7 +48,7 @@ public CDataSchemaPythonTest() PythonEngine.Initialize(); - if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && !PythonEngine.PythonPath.Contains("dlls", StringComparison.OrdinalIgnoreCase)) { dynamic sys = Py.Import("sys"); @@ -193,7 +195,7 @@ private RecordBatch GetTestRecordBatch() { new Field("col1", Int64Type.Default, true), new Field("col2", StringType.Default, true), - new Field("col3", DoubleType.Default, false), + new Field("col3", DoubleType.Default, true), }; return new RecordBatch( new Schema(fields, null), @@ -224,6 +226,22 @@ private dynamic GetPythonRecordBatch() } } + private IArrowArrayStream GetTestArrayStream() + { + RecordBatch recordBatch = GetTestRecordBatch(); + return new TestArrayStream(recordBatch.Schema, recordBatch); + } + + private dynamic GetPythonRecordBatchReader() + { + dynamic recordBatch = GetPythonRecordBatch(); + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + return pa.RecordBatchReader.from_batches(recordBatch.schema, new PyList(new PyObject[] { recordBatch })); + } + } + // Schemas created in Python, used in CSharp [SkippableFact] public unsafe void ImportType() @@ -455,37 +473,6 @@ public unsafe void ImportRecordBatch() Assert.Equal(5, recordBatch.Length); } - [SkippableFact] - public unsafe void ImportRecordBatchReader() - { - CArrowArray* cArray = CArrowArray.Create(); - CArrowSchema* cSchema = CArrowSchema.Create(); - - using (Py.GIL()) - { - dynamic pa = Py.Import("pyarrow"); - dynamic table = pa.table( - new PyList(new PyObject[] - { - pa.array(new long?[] { 1, 2, 3, null, 5 }), - pa.array(new[] { "hello", "world", null, "foo", "bar" }), - pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) - }), - new[] { "col1", "col2", "col3" }); - - dynamic batch = table.to_batches()[0]; - - long batchPtr = ((IntPtr)cArray).ToInt64(); - long schemaPtr = ((IntPtr)cSchema).ToInt64(); - batch._export_to_c(batchPtr, schemaPtr); - } - - Schema schema = CArrowSchemaImporter.ImportSchema(cSchema); - RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); - - Assert.Equal(5, recordBatch.Length); - } - [SkippableFact] public unsafe void ImportArrayStream() { @@ -542,6 +529,7 @@ public unsafe void ExportArray() // Since we allocated, we are responsible for freeing the pointer. CArrowArray.Free(cArray); + CArrowSchema.Free(cSchema); } [SkippableFact] @@ -551,35 +539,103 @@ public unsafe void ExportBatch() dynamic pyBatch = GetPythonRecordBatch(); CArrowArray* cArray = CArrowArray.Create(); + CArrowArrayExporter.ExportRecordBatch(batch, cArray); + CArrowSchema* cSchema = CArrowSchema.Create(); + CArrowSchemaExporter.ExportSchema(batch.Schema, cSchema); + + // For Python, we need to provide the pointers + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic exportedPyArray = pa.RecordBatch._import_from_c(arrayPtr, schemaPtr); + Assert.True(exportedPyArray == pyBatch); + } + + // Since we allocated, we are responsible for freeing the pointer. + CArrowArray.Free(cArray); + CArrowSchema.Free(cSchema); + } -#if TODO + [SkippableFact] + public unsafe void ExportBatchReader() + { + RecordBatch batch = GetTestRecordBatch(); + dynamic pyBatch = GetPythonRecordBatch(); + + CArrowArray* cArray = CArrowArray.Create(); CArrowArrayExporter.ExportRecordBatch(batch, cArray); + CArrowSchema* cSchema = CArrowSchema.Create(); + CArrowSchemaExporter.ExportSchema(batch.Schema, cSchema); + + // For Python, we need to provide the pointers + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + using (Py.GIL()) { dynamic pa = Py.Import("pyarrow"); - dynamic table = pa.table( - new PyList(new PyObject[] - { - pa.array(new long?[] { 1, 2, 3, null, 5 }), - pa.array(new[] { "hello", "world", null, "foo", "bar" }), - pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) - }), - new[] { "col1", "col2", "col3" }); + dynamic exportedPyArray = pa.RecordBatch._import_from_c(arrayPtr, schemaPtr); + Assert.True(exportedPyArray == pyBatch); + } - dynamic batch = table.to_batches()[0]; + // Since we allocated, we are responsible for freeing the pointer. + CArrowArray.Free(cArray); + CArrowSchema.Free(cSchema); + } - long batchPtr = ((IntPtr)cArray).ToInt64(); - long schemaPtr = ((IntPtr)cSchema).ToInt64(); - batch._export_to_c(batchPtr, schemaPtr); + [SkippableFact] + public unsafe void ExportArrayStream() + { + IArrowArrayStream arrayStream = GetTestArrayStream(); + dynamic pyRecordBatchReader = GetPythonRecordBatchReader(); + + CArrowArrayStream* cArrayStream = CArrowArrayStream.Create(); + CArrowArrayStreamExporter.ExportArrayStream(arrayStream, cArrayStream); + + // For Python, we need to provide the pointers + long arrayStreamPtr = ((IntPtr)cArrayStream).ToInt64(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic exportedPyReader = pa.RecordBatchReader._import_from_c(arrayStreamPtr); + Assert.True(exportedPyReader.read_all() == pyRecordBatchReader.read_all()); } - Schema schema = CArrowSchemaImporter.ImportSchema(cSchema); - RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); + // Since we allocated, we are responsible for freeing the pointer. + CArrowArrayStream.Free(cArrayStream); + } - Assert.Equal(5, recordBatch.Length); -#endif + sealed class TestArrayStream : IArrowArrayStream + { + private readonly RecordBatch[] _batches; + private int _index; + + public TestArrayStream(Schema schema, params RecordBatch[] batches) + { + Schema = schema; + _batches = batches; + } + + public Schema Schema { get; } + + public ValueTask ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) + { + if (_index < 0) { throw new ObjectDisposedException(nameof(TestArrayStream)); } + + RecordBatch result = _index < _batches.Length ? _batches[_index++] : null; + return new ValueTask(result); + } + + public void Dispose() + { + _index = -1; + } } } } From a0dc92bb7603fec9c69149f353c0d0c0554820ee Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 8 May 2023 05:45:44 -0700 Subject: [PATCH 03/13] Small changes --- .../Apache.Arrow/Arrays/ArrowArrayFactory.cs | 2 + csharp/src/Apache.Arrow/Arrays/NullArray.cs | 42 +++++++++++++++++++ .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 6 ++- .../Memory/NativeMemoryManager.cs | 6 ++- .../CDataInterfacePythonTests.cs | 3 +- 5 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 csharp/src/Apache.Arrow/Arrays/NullArray.cs diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index 319dcab17e75c..845cbbd3e56f2 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -25,6 +25,8 @@ public static IArrowArray BuildArray(ArrayData data) { switch (data.DataType.TypeId) { + case ArrowTypeId.Null: + return new NullArray(data); case ArrowTypeId.Boolean: return new BooleanArray(data); case ArrowTypeId.UInt8: diff --git a/csharp/src/Apache.Arrow/Arrays/NullArray.cs b/csharp/src/Apache.Arrow/Arrays/NullArray.cs new file mode 100644 index 0000000000000..c6957a3d4ea30 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/NullArray.cs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class NullArray : IArrowArray + { + public ArrayData Data { get; } + + public NullArray(ArrayData data) + { + data.EnsureDataType(ArrowTypeId.Null); + data.EnsureBufferCount(0); + } + + public int Length => Data.Length; + + public int Offset => Data.Offset; + + public int NullCount => Data.NullCount; + + public void Dispose() { } + public bool IsNull(int index) => true; + public bool IsValid(int index) => false; + + public void Accept(IArrowArrayVisitor visitor) => throw new System.NotImplementedException(); + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index 5939219db46e0..f7e26a02f84fd 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -155,7 +155,9 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) throw new NotSupportedException(); case ArrowTypeId.Map: throw new NotSupportedException(); - case ArrowTypeId.Null: // TODO + case ArrowTypeId.Null: + buffers = new ArrowBuffer[0]; + break; default: if (type is FixedWidthType fixedWidthType) { @@ -163,7 +165,7 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) } else { - throw new InvalidOperationException(); + throw new NotSupportedException(); } break; } diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 9a6be27f05159..77fd63d1e5253 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -88,7 +88,11 @@ bool IShareable.TryShare(SharedNativeAllocationOwner newOwner) INativeAllocationOwner oldOwner = Interlocked.Exchange(ref _owner, newOwner); if (oldOwner == null) { - _owner = null; + // We've already been disposed, so restore the null owner. If it turns + // out that we've raced the disposing thread and we end up with ownership + // of the memory, we'll need to deallocate it as a result of the sharing + // failure. + Interlocked.Exchange(ref _owner, null); return false; } diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 4d54c03863db4..6fb8108747496 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -454,11 +454,12 @@ public unsafe void ImportRecordBatch() dynamic table = pa.table( new PyList(new PyObject[] { + pa.array(new object[] { null, null, null, null, null }), pa.array(new long?[] { 1, 2, 3, null, 5 }), pa.array(new[] { "hello", "world", null, "foo", "bar" }), pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) }), - new[] { "col1", "col2", "col3" }); + new[] { "col0", "col1", "col2", "col3" }); dynamic batch = table.to_batches()[0]; From 366225bc5ace4d19c3bdd2b2fd23ff4829eff382 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 8 May 2023 16:28:44 +0000 Subject: [PATCH 04/13] Implemented complete (if limited) memory allocation strategy. --- csharp/src/Apache.Arrow/ArrowBuffer.cs | 14 +- .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 59 +++++---- .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 2 +- ...ionOwner.cs => ExportedAllocationOwner.cs} | 30 ++++- .../{IShareable.cs => IOwnableAllocation.cs} | 6 +- .../Memory/ImportedAllocationOwner.cs | 68 ++++++++++ .../Memory/ImportedMemoryOwner.cs | 122 ------------------ .../Memory/NativeMemoryManager.cs | 41 +++--- 8 files changed, 166 insertions(+), 176 deletions(-) rename csharp/src/Apache.Arrow/Memory/{SharedNativeAllocationOwner.cs => ExportedAllocationOwner.cs} (53%) rename csharp/src/Apache.Arrow/Memory/{IShareable.cs => IOwnableAllocation.cs} (86%) create mode 100644 csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs delete mode 100644 csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.cs b/csharp/src/Apache.Arrow/ArrowBuffer.cs index 653c9c2d42634..39fd7a31b9ca9 100644 --- a/csharp/src/Apache.Arrow/ArrowBuffer.cs +++ b/csharp/src/Apache.Arrow/ArrowBuffer.cs @@ -73,13 +73,23 @@ public void Dispose() _memoryOwner?.Dispose(); } - internal bool TryShare(SharedNativeAllocationOwner newOwner) + internal bool TryExport(ExportedAllocationOwner newOwner, out IntPtr ptr) { if (_memoryOwner == null && IsEmpty) { + ptr = IntPtr.Zero; return true; } - return _memoryOwner is IShareable shareableMemory && shareableMemory.TryShare(newOwner); + + if (_memoryOwner is IOwnableAllocation ownable && ownable.TryAcquire(out ptr, out int offset, out int length)) + { + newOwner.Acquire(ptr, offset, length); + ptr += offset; + return true; + } + + ptr = IntPtr.Zero; + return false; } } } diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs index 3d5eedd5e1db0..3f062e36527c7 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -51,7 +51,18 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray)); } - ConvertArray(new SharedNativeAllocationOwner(), array.Data, cArray); + ExportedAllocationOwner allocationOwner = new ExportedAllocationOwner(); + try + { + ConvertArray(allocationOwner, array.Data, cArray); + cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->private_data = FromDisposable(allocationOwner); + allocationOwner = null; + } + finally + { + allocationOwner?.Dispose(); + } } /// @@ -81,16 +92,27 @@ public static unsafe void ExportRecordBatch(RecordBatch batch, CArrowArray* cArr throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray)); } - ConvertRecordBatch(new SharedNativeAllocationOwner(), batch, cArray); + ExportedAllocationOwner allocationOwner = new ExportedAllocationOwner(); + try + { + ConvertRecordBatch(allocationOwner, batch, cArray); + cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->private_data = FromDisposable(allocationOwner); + allocationOwner = null; + } + finally + { + allocationOwner?.Dispose(); + } } - private unsafe static void ConvertArray(SharedNativeAllocationOwner sharedOwner, ArrayData array, CArrowArray* cArray) + private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, ArrayData array, CArrowArray* cArray) { cArray->length = array.Length; cArray->offset = array.Offset; cArray->null_count = array.NullCount; cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); - cArray->private_data = FromDisposable(array); + cArray->private_data = null; cArray->n_buffers = array.Buffers?.Length ?? 0; cArray->buffers = null; @@ -100,11 +122,12 @@ private unsafe static void ConvertArray(SharedNativeAllocationOwner sharedOwner, for (int i = 0; i < array.Buffers.Length; i++) { ArrowBuffer buffer = array.Buffers[i]; - if (!buffer.TryShare(sharedOwner)) + IntPtr ptr; + if (!buffer.TryExport(sharedOwner, out ptr)) { throw new NotSupportedException(); // TODO } - cArray->buffers[i] = (byte*)buffer.Memory.Pin().Pointer; + cArray->buffers[i] = (byte*)ptr; } } @@ -128,13 +151,13 @@ private unsafe static void ConvertArray(SharedNativeAllocationOwner sharedOwner, } } - private unsafe static void ConvertRecordBatch(SharedNativeAllocationOwner sharedOwner, RecordBatch batch, CArrowArray* cArray) + private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwner, RecordBatch batch, CArrowArray* cArray) { cArray->length = batch.Length; cArray->offset = 0; cArray->null_count = 0; cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); - cArray->private_data = FromDisposable(batch); + cArray->private_data = null; cArray->n_buffers = 1; cArray->buffers = (byte**)Marshal.AllocCoTaskMem(IntPtr.Size); @@ -158,25 +181,11 @@ private unsafe static void ConvertRecordBatch(SharedNativeAllocationOwner shared private unsafe static void ReleaseArray(CArrowArray* cArray) { - if (cArray->n_children != 0) + if (cArray->private_data != null) { - for (int i = 0; i < cArray->n_children; i++) - { - ReleaseArray(cArray->children[i]); - Marshal.FreeCoTaskMem((IntPtr)cArray->children[i]); - } - Marshal.FreeCoTaskMem((IntPtr)cArray->children); - cArray->children = null; - cArray->n_children = 0; + Dispose(&cArray->private_data); } - - if (cArray->buffers != null) - { - Marshal.FreeCoTaskMem((IntPtr)cArray->buffers); - cArray->buffers = null; - } - - Dispose(&cArray->private_data); + cArray->private_data = null; cArray->release = null; } diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index f7e26a02f84fd..67617dcf18253 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -87,7 +87,7 @@ public static unsafe RecordBatch ImportRecordBatch(CArrowArray* ptr, Schema sche } } - private sealed unsafe class ImportedArrowArray : ImportedMemoryOwner + private sealed unsafe class ImportedArrowArray : ImportedAllocationOwner { private readonly CArrowArray* _cArray; diff --git a/csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs similarity index 53% rename from csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs rename to csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs index 47e26171624e0..5b72819e3d0a5 100644 --- a/csharp/src/Apache.Arrow/Memory/SharedNativeAllocationOwner.cs +++ b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs @@ -15,15 +15,41 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices; namespace Apache.Arrow.Memory { - internal sealed class SharedNativeAllocationOwner : INativeAllocationOwner + internal sealed class ExportedAllocationOwner : INativeAllocationOwner, IDisposable { - // readonly List _pointers = new List(); + private readonly List _pointers = new List(); + private int _allocationSize; + + ~ExportedAllocationOwner() + { + Dispose(); + } + + public void Acquire(IntPtr ptr, int offset, int length) + { + _pointers.Add(ptr); + _allocationSize += length; + } public void Release(IntPtr ptr, int offset, int length) { + throw new InvalidOperationException(); + } + + public void Dispose() + { + for (int i = 0; i < _pointers.Count; i++) + { + if (_pointers[i] != IntPtr.Zero) + { + Marshal.FreeHGlobal(_pointers[i]); + _pointers[i] = IntPtr.Zero; + } + } } } } diff --git a/csharp/src/Apache.Arrow/Memory/IShareable.cs b/csharp/src/Apache.Arrow/Memory/IOwnableAllocation.cs similarity index 86% rename from csharp/src/Apache.Arrow/Memory/IShareable.cs rename to csharp/src/Apache.Arrow/Memory/IOwnableAllocation.cs index 6a6d5028365fe..a5e7565bf0f40 100644 --- a/csharp/src/Apache.Arrow/Memory/IShareable.cs +++ b/csharp/src/Apache.Arrow/Memory/IOwnableAllocation.cs @@ -13,10 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +using System; + namespace Apache.Arrow.Memory { - internal interface IShareable + internal interface IOwnableAllocation { - bool TryShare(SharedNativeAllocationOwner newOwner); + bool TryAcquire(out IntPtr ptr, out int offset, out int length); } } diff --git a/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs new file mode 100644 index 0000000000000..a3305718bc480 --- /dev/null +++ b/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Threading; + +namespace Apache.Arrow.Memory +{ + internal abstract class ImportedAllocationOwner : INativeAllocationOwner + { + long _referenceCount; + long _managedMemory; + + protected ImportedAllocationOwner() + { + _referenceCount = 1; + } + + public IMemoryOwner AddMemory(IntPtr ptr, int offset, int length) + { + if (_referenceCount <= 0) + { + throw new ObjectDisposedException(typeof(ImportedAllocationOwner).Name); + } + + NativeMemoryManager memory = new NativeMemoryManager(this, ptr, offset, length); + Interlocked.Increment(ref _referenceCount); + Interlocked.Add(ref _managedMemory, length); + if (length > 0) + { + GC.AddMemoryPressure(length); + } + return memory; + } + + public void Release(IntPtr ptr, int offset, int length) + { + Release(); + } + + public void Release() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + if (_managedMemory > 0) + { + GC.RemoveMemoryPressure(_managedMemory); + } + FinalRelease(); + } + } + + protected abstract void FinalRelease(); + } +} diff --git a/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs b/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs deleted file mode 100644 index 55fd1ddfbb2ae..0000000000000 --- a/csharp/src/Apache.Arrow/Memory/ImportedMemoryOwner.cs +++ /dev/null @@ -1,122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -using System; -using System.Buffers; -using System.Runtime.CompilerServices; -using System.Threading; - -namespace Apache.Arrow.Memory -{ - internal abstract class ImportedMemoryOwner - { - long _referenceCount; - long _managedMemory; - - protected ImportedMemoryOwner() - { - _referenceCount = 1; - } - - public IMemoryOwner AddMemory(IntPtr ptr, int offset, int length) - { - if (_referenceCount <= 0) - { - throw new ObjectDisposedException(typeof(ImportedMemoryManager).Name); - } - - IMemoryOwner memory = new ImportedMemoryManager(this, ptr, offset, length); - Interlocked.Increment(ref _referenceCount); - Interlocked.Add(ref _managedMemory, length); - if (length > 0) - { - GC.AddMemoryPressure(length); - } - return memory; - } - - public void Release() - { - if (Interlocked.Decrement(ref _referenceCount) == 0) - { - if (_managedMemory > 0) - { - GC.RemoveMemoryPressure(_managedMemory); - } - FinalRelease(); - } - } - - protected abstract void FinalRelease(); - - sealed private class ImportedMemoryManager : MemoryManager - { - private ImportedMemoryOwner _owner; - private IntPtr _ptr; - private readonly int _offset; - private readonly int _length; - - public ImportedMemoryManager(ImportedMemoryOwner owner, IntPtr ptr, int offset, int length) - { - _owner = owner; - _ptr = ptr; - _offset = offset; - _length = length; - } - - ~ImportedMemoryManager() - { - Dispose(false); - } - - public override unsafe Span GetSpan() - { - void* ptr = CalculatePointer(0); - return new Span(ptr, _length); - } - - public override unsafe MemoryHandle Pin(int elementIndex = 0) - { - // NOTE: Imported memory doesn't require GC pinning because by definition it's not - // managed by the garbage collector. - - void* ptr = CalculatePointer(elementIndex); - return new MemoryHandle(ptr, default, this); - } - - public override void Unpin() - { - // SEE: Pin implementation - return; - } - - protected override void Dispose(bool disposing) - { - // Only free once. - - ImportedMemoryOwner owner = Interlocked.Exchange(ref _owner, null); - if (owner != null) - { - _ptr = IntPtr.Zero; - owner.Release(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private unsafe void* CalculatePointer(int index) => - (_ptr + _offset + index).ToPointer(); - } - } -} diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 77fd63d1e5253..e2bce374e9d3d 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -20,12 +20,12 @@ namespace Apache.Arrow.Memory { - public class NativeMemoryManager : MemoryManager, IShareable + public class NativeMemoryManager : MemoryManager, IOwnableAllocation { private IntPtr _ptr; private readonly int _offset; private readonly int _length; - private INativeAllocationOwner _owner; + private readonly INativeAllocationOwner _owner; public NativeMemoryManager(IntPtr ptr, int offset, int length) { @@ -72,35 +72,32 @@ public override void Unpin() protected override void Dispose(bool disposing) { // Only free once. - INativeAllocationOwner owner = Interlocked.Exchange(ref _owner, null); - if (_owner != null) + IntPtr ptr = Interlocked.Exchange(ref _ptr, IntPtr.Zero); + if (ptr != IntPtr.Zero) { - IntPtr ptr = Interlocked.Exchange(ref _ptr, IntPtr.Zero); - if (ptr != IntPtr.Zero) - { - owner.Release(ptr, _offset, _length); - } + _owner.Release(ptr, _offset, _length); } } - bool IShareable.TryShare(SharedNativeAllocationOwner newOwner) + bool IOwnableAllocation.TryAcquire(out IntPtr ptr, out int offset, out int length) { - INativeAllocationOwner oldOwner = Interlocked.Exchange(ref _owner, newOwner); - if (oldOwner == null) - { - // We've already been disposed, so restore the null owner. If it turns - // out that we've raced the disposing thread and we end up with ownership - // of the memory, we'll need to deallocate it as a result of the sharing - // failure. - Interlocked.Exchange(ref _owner, null); - return false; - } + // TODO: implement refcounted buffers? - if (oldOwner is SharedNativeAllocationOwner sharedOwner) + if (object.ReferenceEquals(_owner, NativeMemoryAllocator.ExclusiveOwner)) { + ptr = Interlocked.Exchange(ref _ptr, IntPtr.Zero); + if (ptr != IntPtr.Zero) + { + offset = _offset; + length = _length; + return true; + } } - return true; + ptr = IntPtr.Zero; + offset = 0; + length = 0; + return false; } [MethodImpl(MethodImplOptions.AggressiveInlining)] From c679e2e157814bf321f3f0ab0ba0b26559ec118f Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 8 May 2023 20:30:37 +0000 Subject: [PATCH 05/13] Cleanup --- .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 8 +- .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 110 +++++++++++------- .../CDataInterfaceDataTests.cs | 2 +- .../CDataInterfacePythonTests.cs | 41 ++++++- 4 files changed, 112 insertions(+), 49 deletions(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs index 3f062e36527c7..2d949018dbd61 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -25,7 +25,9 @@ public static class CArrowArrayExporter private unsafe delegate void ReleaseArrowArray(CArrowArray* cArray); /// - /// Export an to a . + /// Export an to a . Whether or not the + /// export succeeds, the original array becomes invalid. Clone an array to continue using it + /// after a copy has been exported. /// /// The array to export /// An allocated but uninitialized CArrowArray pointer. @@ -66,7 +68,9 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) } /// - /// Export a to a . + /// Export a to a . Whether or not the + /// export succeeds, the original record batch becomes invalid. Clone the batch to continue using it + /// after a copy has been exported. /// /// The record batch to export /// An allocated but uninitialized CArrowArray pointer. diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index 67617dcf18253..c5fa25d012e0c 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -16,6 +16,7 @@ // under the License. using System; +using System.Collections.Generic; using Apache.Arrow.Memory; using Apache.Arrow.Types; @@ -114,13 +115,11 @@ protected override void FinalRelease() public IArrowArray GetAsArray(IArrowType type) { - // TODO: Cleanup more deterministically when there's an exception return ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray, type)); } public RecordBatch GetAsRecordBatch(Schema schema) { - // TODO: Cleanup more deterministically when there's an exception IArrowArray[] arrays = new IArrowArray[schema.FieldsList.Count]; for (int i = 0; i < _cArray->n_children; i++) { @@ -132,16 +131,8 @@ public RecordBatch GetAsRecordBatch(Schema schema) private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) { ArrayData[] children = null; - if (cArray->n_children > 0) - { - children = new ArrayData[cArray->n_children]; - for (int i = 0; i < cArray->n_children; i++) - { - children[i] = GetAsArrayData(cArray->children[i], type); - } - } - ArrowBuffer[] buffers = null; + ArrayData dictionary = null; switch (type.TypeId) { case ArrowTypeId.String: @@ -149,19 +140,28 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) buffers = ImportByteArrayBuffers(cArray); break; case ArrowTypeId.List: + children = ProcessListChildren(cArray, ((ListType)type).ValueDataType); + buffers = ImportListBuffers(cArray); + break; case ArrowTypeId.Struct: + children = ProcessStructChildren(cArray, ((StructType)type).Fields); + buffers = new ArrowBuffer[] { ImportValidityBuffer(cArray) }; + break; case ArrowTypeId.Union: - case ArrowTypeId.Dictionary: - throw new NotSupportedException(); case ArrowTypeId.Map: + // TODO: throw new NotSupportedException(); case ArrowTypeId.Null: buffers = new ArrowBuffer[0]; break; + case ArrowTypeId.Dictionary: + DictionaryType dictionaryType = (DictionaryType)type; + dictionary = GetAsArrayData(cArray->dictionary, dictionaryType.ValueType); + goto default; // Fall through to get the validity and index data default: if (type is FixedWidthType fixedWidthType) { - buffers = ImportBuffers(cArray, fixedWidthType.BitWidth); + buffers = ImportFixedWidthBuffers(cArray, fixedWidthType.BitWidth); } else { @@ -170,12 +170,6 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) break; } - ArrayData dictionary = null; - if (cArray->dictionary != null) - { - dictionary = GetAsArrayData(cArray->dictionary, type); - } - return new ArrayData( type, checked((int)cArray->length), @@ -186,40 +180,82 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) dictionary); } + private ArrayData[] ProcessListChildren(CArrowArray* cArray, IArrowType type) + { + if (cArray->n_children != 1) + { + throw new InvalidOperationException("Lists are expected to have exactly one child array"); + } + + ArrayData[] children = new ArrayData[1]; + children[0] = GetAsArrayData(cArray->children[0], type); + return children; + } + + private ArrayData[] ProcessStructChildren(CArrowArray* cArray, IReadOnlyList fields) + { + if (cArray->n_children != fields.Count) + { + throw new InvalidOperationException("Struct child count does not match schema"); + } + + ArrayData[] children = new ArrayData[fields.Count]; + for (int i = 0; i < fields.Count; i++) + { + children[i] = GetAsArrayData(cArray->children[i], fields[i].DataType); + } + return children; + } + + private ArrowBuffer ImportValidityBuffer(CArrowArray* cArray) + { + int length = checked((int)cArray->length); + int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); + return (cArray->buffers[0] == null) ? ArrowBuffer.Empty : new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength)); + } + private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray) { if (cArray->n_buffers != 3) { - return null; + throw new InvalidOperationException("Byte arrays are expected to have exactly three child arrays"); } - // validity, offsets, data int length = checked((int)cArray->length); int offsetsLength = (length + 1) * 4; int* offsets = (int*)cArray->buffers[1]; int valuesLength = offsets[length]; - int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); ArrowBuffer[] buffers = new ArrowBuffer[3]; + buffers[0] = ImportValidityBuffer(cArray); buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength)); buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, valuesLength)); - if (cArray->buffers[0] != null) - { - buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength)); - } - else + + return buffers; + } + + private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 2) { - buffers[0] = ArrowBuffer.Empty; + throw new InvalidOperationException("List arrays are expected to have exactly two children"); } + int length = checked((int)cArray->length); + int offsetsLength = (length + 1) * 4; + + ArrowBuffer[] buffers = new ArrowBuffer[2]; + buffers[0] = ImportValidityBuffer(cArray); + buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength)); + return buffers; } - private ArrowBuffer[] ImportBuffers(CArrowArray* cArray, int bitWidth) + private ArrowBuffer[] ImportFixedWidthBuffers(CArrowArray* cArray, int bitWidth) { if (cArray->n_buffers != 2) { - return null; + throw new InvalidOperationException("Arrays of fixed-width type are expected to have exactly two children"); } // validity, data @@ -230,18 +266,8 @@ private ArrowBuffer[] ImportBuffers(CArrowArray* cArray, int bitWidth) else valuesLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); - int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8); - ArrowBuffer[] buffers = new ArrowBuffer[2]; - if (cArray->buffers[0] != null) - { - buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, validityLength)); - } - else - { - buffers[0] = ArrowBuffer.Empty; - } - + buffers[0] = ImportValidityBuffer(cArray); buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, valuesLength)); return buffers; diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs index 3a8a20bc8375b..98cd5a9c5e973 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs @@ -81,7 +81,7 @@ public unsafe void CallsReleaseForInvalid() cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate( releaseCallback); - Assert.Throws(() => + Assert.Throws(() => { CArrowArrayImporter.ImportArray(cArray, GetTestArray().Data.DataType); }); diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 6fb8108747496..9ed2d3618372a 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -455,11 +455,24 @@ public unsafe void ImportRecordBatch() new PyList(new PyObject[] { pa.array(new object[] { null, null, null, null, null }), - pa.array(new long?[] { 1, 2, 3, null, 5 }), - pa.array(new[] { "hello", "world", null, "foo", "bar" }), - pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }) + pa.array(List(1, 2, 3, null, 5)), + pa.array(List("hello", "world", null, "foo", "bar")), + pa.array(List(0.0, 1.4, 2.5, 3.6, 4.7)), + pa.array(new PyObject[] { List(1, 2), List(3, 4), PyObject.None, PyObject.None, List(5, 4, 3) }), + pa.StructArray.from_arrays( + new PyList(new PyObject[] + { + List(10, 9, null, null, null), + List("banana", "apple", "orange", "cherry", "grape"), + List(null, 4.3, -9, 123.456, 0), + }), + new[] { "fld1", "fld2", "fld3" }), + pa.DictionaryArray.from_arrays( + pa.array(List(1, 0, 1, 1, null)), + pa.array(List("foo", "bar")) + ), }), - new[] { "col0", "col1", "col2", "col3" }); + new[] { "col1", "col1", "col2", "col3", "col4", "col5", "col6" }); dynamic batch = table.to_batches()[0]; @@ -612,6 +625,26 @@ public unsafe void ExportArrayStream() CArrowArrayStream.Free(cArrayStream); } + private static PyObject List(params int?[] values) + { + return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray()); + } + + private static PyObject List(params long?[] values) + { + return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray()); + } + + private static PyObject List(params double?[] values) + { + return new PyList(values.Select(i => i == null ? PyObject.None : new PyFloat(i.Value)).ToArray()); + } + + private static PyObject List(params string[] values) + { + return new PyList(values.Select(i => i == null ? PyObject.None : new PyString(i)).ToArray()); + } + sealed class TestArrayStream : IArrowArrayStream { private readonly RecordBatch[] _batches; From 67c5044666c26c56c5b44e141a2cb4410f079474 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 9 May 2023 21:08:09 -0700 Subject: [PATCH 06/13] Improvements as per pull request --- .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 10 +- .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 12 +-- .../src/Apache.Arrow/C/CArrowArrayStream.cs | 29 ++++++ .../C/CArrowArrayStreamExporter.cs | 99 ++++++++++++++----- .../Memory/ExportedAllocationOwner.cs | 8 +- dev/archery/archery/integration/datagen.py | 2 - docs/source/status.rst | 2 +- 7 files changed, 125 insertions(+), 37 deletions(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs index 2d949018dbd61..c38589c45f7b5 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -122,14 +122,14 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr cArray->buffers = null; if (cArray->n_buffers > 0) { - cArray->buffers = (byte**)Marshal.AllocCoTaskMem(array.Buffers.Length * IntPtr.Size); + cArray->buffers = (byte**)sharedOwner.Allocate(array.Buffers.Length * IntPtr.Size); for (int i = 0; i < array.Buffers.Length; i++) { ArrowBuffer buffer = array.Buffers[i]; IntPtr ptr; if (!buffer.TryExport(sharedOwner, out ptr)) { - throw new NotSupportedException(); // TODO + throw new NotSupportedException($"An ArrowArray of type {array.DataType.TypeId} could not be exported"); } cArray->buffers[i] = (byte*)ptr; } @@ -139,7 +139,7 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr cArray->children = null; if (cArray->n_children > 0) { - cArray->children = (CArrowArray**)Marshal.AllocCoTaskMem(IntPtr.Size * array.Children.Length); + cArray->children = (CArrowArray**)sharedOwner.Allocate(IntPtr.Size * array.Children.Length); for (int i = 0; i < array.Children.Length; i++) { cArray->children[i] = CArrowArray.Create(); @@ -164,13 +164,13 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne cArray->private_data = null; cArray->n_buffers = 1; - cArray->buffers = (byte**)Marshal.AllocCoTaskMem(IntPtr.Size); + cArray->buffers = (byte**)sharedOwner.Allocate(IntPtr.Size); cArray->n_children = batch.ColumnCount; cArray->children = null; if (cArray->n_children > 0) { - cArray->children = (CArrowArray**)Marshal.AllocCoTaskMem(IntPtr.Size * batch.ColumnCount); + cArray->children = (CArrowArray**)sharedOwner.Allocate(IntPtr.Size * batch.ColumnCount); int i = 0; foreach (IArrowArray child in batch.Arrays) { diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index c5fa25d012e0c..db5181b56e859 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -149,8 +149,7 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) break; case ArrowTypeId.Union: case ArrowTypeId.Map: - // TODO: - throw new NotSupportedException(); + break; case ArrowTypeId.Null: buffers = new ArrowBuffer[0]; break; @@ -163,13 +162,14 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) { buffers = ImportFixedWidthBuffers(cArray, fixedWidthType.BitWidth); } - else - { - throw new NotSupportedException(); - } break; } + if (buffers == null) + { + throw new NotSupportedException("Data type is not yet supported in import."); + } + return new ArrayData( type, checked((int)cArray->length), diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs index 1430ba6cb51b7..bf1fcb39ef386 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStream.cs @@ -29,10 +29,39 @@ namespace Apache.Arrow.C [StructLayout(LayoutKind.Sequential)] public unsafe struct CArrowArrayStream { + /// + /// Callback to get the stream type. Will be the same for all arrays in the stream. + /// If successful, the ArrowSchema must be released independently from the stream. + /// + /// Return value: 0 if successful, an `errno`-compatible error code otherwise. + /// public delegate* unmanaged[Stdcall] get_schema; + + /// + /// Callback to get the next array. If no error and the array is released, the stream has ended. + /// If successful, the ArrowArray must be released independently from the stream. + /// + /// Return value: 0 if successful, an `errno`-compatible error code otherwise. + /// public delegate* unmanaged[Stdcall] get_next; + + /// + /// Callback to get optional detailed error information. This must only + /// be called if the last stream operation failed with a non-0 return code. + /// The returned pointer is only valid until the next operation on this stream + /// (including release). + /// + /// Return value: pointer to a null-terminated character array describing the last + /// error, or NULL if no description is available. + /// public delegate* unmanaged[Stdcall] get_last_error; + + /// + /// Release callback: release the stream's own resources. Note that arrays returned by + /// get_next must be individually released. + /// public delegate* unmanaged[Stdcall] release; + public void* private_data; /// diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs index a538525ff527f..a7ec40283a88a 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs @@ -16,7 +16,6 @@ using System; using System.Runtime.InteropServices; -using Apache.Arrow.Memory; using Apache.Arrow.Ipc; namespace Apache.Arrow.C @@ -55,7 +54,7 @@ public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArro throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArrayStream)); } - cArrayStream->private_data = FromDisposable(arrayStream); + cArrayStream->private_data = ExportedArrayStream.Export(arrayStream); cArrayStream->get_schema = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetSchema); cArrayStream->get_next = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetNext); cArrayStream->get_last_error = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetLastError); @@ -64,57 +63,113 @@ public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArro private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema) { + ExportedArrayStream arrayStream = null; try { - IArrowArrayStream arrayStream = FromPointer(cArrayStream->private_data); - CArrowSchemaExporter.ExportSchema(arrayStream.Schema, cSchema); - return 0; + arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data); + CArrowSchemaExporter.ExportSchema(arrayStream.ArrowArrayStream.Schema, cSchema); + return arrayStream.ClearError(); } - catch (Exception) + catch (Exception ex) { - return 1; + return arrayStream?.SetError(ex) ?? ExportedArrayStream.EOTHER; } } private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* cArray) { + ExportedArrayStream arrayStream = null; try { cArray->release = null; - IArrowArrayStream arrayStream = FromPointer(cArrayStream->private_data); - RecordBatch recordBatch = arrayStream.ReadNextRecordBatchAsync().Result; + arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data); + RecordBatch recordBatch = arrayStream.ArrowArrayStream.ReadNextRecordBatchAsync().Result; if (recordBatch != null) { CArrowArrayExporter.ExportRecordBatch(recordBatch, cArray); } - return 0; + return arrayStream.ClearError(); } - catch (Exception) + catch (Exception ex) { - return 1; + return arrayStream?.SetError(ex) ?? ExportedArrayStream.EOTHER; } } private unsafe static byte* GetLastError(CArrowArrayStream* cArrayStream) { - return null; + try + { + ExportedArrayStream arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data); + return arrayStream.LastError; + } + catch (Exception) + { + return null; + } } private unsafe static void Release(CArrowArrayStream* cArrayStream) { - FromPointer(cArrayStream->private_data).Dispose(); + ExportedArrayStream arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data); + arrayStream.Dispose(); + cArrayStream->release = null; } - private unsafe static void* FromDisposable(IDisposable disposable) + sealed unsafe class ExportedArrayStream : IDisposable { - GCHandle gch = GCHandle.Alloc(disposable); - return (void*)GCHandle.ToIntPtr(gch); - } + public const int EOTHER = 131; - private unsafe static IArrowArrayStream FromPointer(void* ptr) - { - GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); - return (IArrowArrayStream)gch.Target; + ExportedArrayStream(IArrowArrayStream arrayStream) + { + ArrowArrayStream = arrayStream; + LastError = null; + } + + public IArrowArrayStream ArrowArrayStream { get; private set; } + public byte* LastError { get; private set; } + + public static void* Export(IArrowArrayStream arrayStream) + { + ExportedArrayStream result = new ExportedArrayStream(arrayStream); + GCHandle gch = GCHandle.Alloc(result); + return (void*)GCHandle.ToIntPtr(gch); + } + + public static ExportedArrayStream FromPointer(void* ptr) + { + GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); + return (ExportedArrayStream)gch.Target; + } + + public int SetError(Exception ex) + { + ReleaseLastError(); + LastError = StringUtil.ToCStringUtf8(ex.Message); + return EOTHER; + } + + public int ClearError() + { + ReleaseLastError(); + return 0; + } + + public void Dispose() + { + ReleaseLastError(); + ArrowArrayStream?.Dispose(); + ArrowArrayStream = null; + } + + void ReleaseLastError() + { + if (LastError != null) + { + Marshal.FreeCoTaskMem((IntPtr)LastError); + LastError = null; + } + } } } } diff --git a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs index 5b72819e3d0a5..0936bc17bbd4b 100644 --- a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs +++ b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs @@ -29,10 +29,16 @@ internal sealed class ExportedAllocationOwner : INativeAllocationOwner, IDisposa Dispose(); } - public void Acquire(IntPtr ptr, int offset, int length) + public IntPtr Allocate(int size) + { + return Acquire(Marshal.AllocHGlobal(size), 0, size); + } + + public IntPtr Acquire(IntPtr ptr, int offset, int length) { _pointers.Add(ptr); _allocationSize += length; + return ptr; } public void Release(IntPtr ptr, int offset, int length) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 87586674ffc5b..87f375666b4e0 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1670,11 +1670,9 @@ def _temp_path(): .skip_category('JS'), generate_null_case([10, 0]) - .skip_category('C#') .skip_category('JS'), # TODO(ARROW-7900) generate_null_trivial_case([0, 0]) - .skip_category('C#') .skip_category('JS'), # TODO(ARROW-7900) generate_decimal128_case(), diff --git a/docs/source/status.rst b/docs/source/status.rst index 3ca82ac3e3bb1..930ce07169ad4 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -32,7 +32,7 @@ Data Types | Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | | (primitive) | | | | | | | | +===================+=======+=======+=======+============+=======+=======+=======+ -| Null | ✓ | ✓ | ✓ | | | ✓ | ✓ | +| Null | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Boolean | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ From f740b36cc945701285f405a683cbc0762b5dad0d Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Wed, 10 May 2023 14:37:38 +0000 Subject: [PATCH 07/13] Added NullArray support for real, this time with tests. --- csharp/src/Apache.Arrow/Arrays/NullArray.cs | 79 ++++++++- .../Ipc/ArrowReaderImplementation.cs | 17 +- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 7 +- .../Ipc/ArrowTypeFlatbufferBuilder.cs | 8 +- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 6 +- .../IntegrationCommand.cs | 10 +- .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 11 +- .../ArrowStreamWriterTests.cs | 1 + .../test/Apache.Arrow.Tests/NullArrayTests.cs | 162 ++++++++++++++++++ csharp/test/Apache.Arrow.Tests/TestData.cs | 8 +- 10 files changed, 295 insertions(+), 14 deletions(-) create mode 100644 csharp/test/Apache.Arrow.Tests/NullArrayTests.cs diff --git a/csharp/src/Apache.Arrow/Arrays/NullArray.cs b/csharp/src/Apache.Arrow/Arrays/NullArray.cs index c6957a3d4ea30..28c90293475a7 100644 --- a/csharp/src/Apache.Arrow/Arrays/NullArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/NullArray.cs @@ -13,18 +13,85 @@ // See the License for the specific language governing permissions and // limitations under the License. +using System; using Apache.Arrow.Types; +using Apache.Arrow.Memory; namespace Apache.Arrow { public class NullArray : IArrowArray { + public class Builder : IArrowArrayBuilder + { + private int _length; + + public int Length => _length; + public int Capacity => _length; + public int NullCount => _length; + + public Builder() + { + } + + public Builder AppendNull() + { + _length++; + return this; + } + + public NullArray Build(MemoryAllocator allocator = default) + { + return new NullArray(_length); + } + + public Builder Clear() + { + _length = 0; + return this; + } + + public Builder Reserve(int capacity) + { + if (capacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(capacity)); + } + + return this; + } + + public Builder Resize(int length) + { + if (length < 0) + { + throw new ArgumentOutOfRangeException(nameof(length)); + } + + _length = length; + return this; + } + + private void CheckIndex(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } + public ArrayData Data { get; } public NullArray(ArrayData data) { data.EnsureDataType(ArrowTypeId.Null); data.EnsureBufferCount(0); + Data = data; + } + + public NullArray(int length) + : this(new ArrayData(NullType.Default, length, length, buffers: System.Array.Empty())) + { } public int Length => Data.Length; @@ -37,6 +104,16 @@ public void Dispose() { } public bool IsNull(int index) => true; public bool IsValid(int index) => false; - public void Accept(IArrowArrayVisitor visitor) => throw new System.NotImplementedException(); + public void Accept(IArrowArrayVisitor visitor) + { + if (visitor is IArrowArrayVisitor nullVisitor) + { + nullVisitor.Visit(this); + } + else + { + visitor.Visit(this); + } + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index a1c1430124013..ed93000736e0b 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -232,12 +232,6 @@ private ArrayData LoadPrimitiveField( IBufferCreator bufferCreator) { - ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator); - if (!recordBatchEnumerator.MoveNextBuffer()) - { - throw new Exception("Unable to move to the next buffer."); - } - int fieldLength = (int)fieldNode.Length; int fieldNullCount = (int)fieldNode.NullCount; @@ -251,6 +245,17 @@ private ArrayData LoadPrimitiveField( throw new InvalidDataException("Null count length must be >= 0"); // TODO:Localize exception message } + if (field.DataType.TypeId == ArrowTypeId.Null) + { + return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, System.Array.Empty()); + } + + ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } + ArrowBuffer[] arrowBuff; if (field.DataType.TypeId == ArrowTypeId.Struct) { diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index d9cafc06bffba..92e5965c8ab88 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -56,7 +56,8 @@ internal class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, - IArrowArrayVisitor + IArrowArrayVisitor, + IArrowArrayVisitor { public readonly struct Buffer { @@ -156,6 +157,10 @@ public void Visit(DictionaryArray array) _buffers.Add(CreateBuffer(array.IndicesBuffer)); } + public void Visit(NullArray array) + { + } + private void CreateBuffers(BooleanArray array) { _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index ee119ae5d7f20..016e379e5bbf0 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -65,7 +65,8 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private FlatBufferBuilder Builder { get; } @@ -218,6 +219,11 @@ public void Visit(FixedSizeBinaryType type) Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth)); } + public void Visit(NullType type) + { + Result = new FieldType(Flatbuf.Type.Null, 0); + } + public void Visit(IArrowType type) { throw new NotImplementedException(); diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index a09fff61b4ac8..7426ba4e86959 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -118,6 +118,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c { switch (field.TypeType) { + case Flatbuf.Type.Null: + return Types.NullType.Default; case Flatbuf.Type.Int: Flatbuf.Int intMetaData = field.Type().Value; return MessageSerializer.GetNumberType(intMetaData.BitWidth, intMetaData.IsSigned); @@ -135,7 +137,7 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c throw new InvalidDataException("Unsupported floating point precision"); } case Flatbuf.Type.Bool: - return new Types.BooleanType(); + return Types.BooleanType.Default; case Flatbuf.Type.Decimal: Flatbuf.Decimal decMeta = field.Type().Value; switch (decMeta.BitWidth) @@ -178,7 +180,7 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c Flatbuf.Interval intervalMetadata = field.Type().Value; return new Types.IntervalType(intervalMetadata.Unit.ToArrow()); case Flatbuf.Type.Utf8: - return new Types.StringType(); + return Types.StringType.Default; case Flatbuf.Type.FixedSizeBinary: Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type().Value; return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth); diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs index b0595747b62d1..9bb566220c5d2 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs @@ -273,7 +273,8 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private JsonFieldData JsonFieldData { get; } public IArrowArray Array { get; private set; } @@ -325,6 +326,13 @@ public void Visit(Decimal256Type type) Array = new Decimal256Array(GetDecimalArrayData(type)); } + public void Visit(NullType type) + { + var json = JsonFieldData.Data.GetRawText(); + int?[] values = JsonSerializer.Deserialize(json, s_options); + Array = new NullArray(values.Length); + } + private ArrayData GetDecimalArrayData(FixedSizeBinaryType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 8fde77d930779..13197bbe1669e 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -90,7 +90,8 @@ private class ArrayComparer : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, - IArrowArrayVisitor + IArrowArrayVisitor, + IArrowArrayVisitor { private readonly IArrowArray _expectedArray; private readonly ArrayTypeComparer _arrayTypeComparer; @@ -154,6 +155,14 @@ public void Visit(DictionaryArray array) array.Dictionary.Accept(dictionaryComparer); } + public void Visit(NullArray array) + { + Assert.IsAssignableFrom(_expectedArray); + Assert.Equal(_expectedArray.Length, array.Length); + Assert.Equal(_expectedArray.NullCount, array.NullCount); + Assert.Equal(_expectedArray.Offset, array.Offset); + } + public void Visit(IArrowArray array) => throw new NotImplementedException(); private void CompareBinaryArrays(BinaryArray actualArray) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs index 4932217b19955..89595f99dc0e4 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs @@ -169,6 +169,7 @@ public void WriteBatchWithNulls() length: 10, nullCount: 3, offset: 0)) + .Append("NullColumn", true, new NullArray(10)) .Build(); TestRoundTripRecordBatch(originalBatch); diff --git a/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs b/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs new file mode 100644 index 0000000000000..22c98dc0c59ba --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class NullArrayTests + { + public class Builder + { + public class Append + { + [Theory] + [InlineData(1)] + [InlineData(3)] + public void IncrementsLength(int count) + { + var builder = new NullArray.Builder(); + + for (var i = 0; i < count; i++) + { + builder.AppendNull(); + } + + var array = builder.Build(); + + Assert.Equal(count, array.Length); + } + } + + public class Clear + { + [Fact] + public void ClearsArray() + { + var array = new NullArray.Builder() + .Resize(8) + .AppendNull() + .AppendNull() + .Clear() + .Build(); + + Assert.Equal(0, array.Length); + } + } + + public class Swap + { + [Fact] + public void SwapsExpectedBits() + { + var array = new BooleanArray.Builder() + .AppendRange(Enumerable.Repeat(false, 8)) + .Set(0, true) + .Swap(0, 7) + .Build(); + + Assert.True(array.GetValue(0).HasValue); + Assert.False(array.GetValue(0).Value); + Assert.True(array.GetValue(7).HasValue); + Assert.True(array.GetValue(7).Value); + #pragma warning disable CS0618 + Assert.False(array.GetBoolean(0)); + Assert.True(array.GetBoolean(7)); + #pragma warning restore CS0618 + } + + [Fact] + public void ThrowsWhenIndexOutOfRange() + { + Assert.Throws(() => + { + var builder = new BooleanArray.Builder(); + builder.Swap(0, 1); + }); + } + } + + public class Set + { + [Theory] + [InlineData(8, 0)] + [InlineData(8, 4)] + [InlineData(8, 7)] + [InlineData(16, 8)] + [InlineData(16, 15)] + public void SetsExpectedBitToTrue(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, true) + .Build(); + + Assert.True(array.GetValue(index).Value); + } + + [Theory] + [InlineData(8, 0)] + [InlineData(8, 4)] + [InlineData(8, 7)] + [InlineData(16, 8)] + [InlineData(16, 15)] + public void SetsExpectedBitsToFalse(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, false) + .Build(); + + Assert.False(array.GetValue(index).Value); + } + + [Theory] + [InlineData(4)] + public void UnsetBitsAreUnchanged(int index) + { + var array = new BooleanArray.Builder() + .AppendRange(Enumerable.Repeat(false, 8)) + .Set(index, true) + .Build(); + + for (var i = 0; i < 8; i++) + { + if (i != index) + { + Assert.True(array.GetValue(i).HasValue); + Assert.False(array.GetValue(i).Value); + #pragma warning disable CS0618 + Assert.False(array.GetBoolean(i)); + #pragma warning restore CS0618 + } + } + } + + [Fact] + public void ThrowsWhenIndexOutOfRange() + { + Assert.Throws(() => + { + var builder = new BooleanArray.Builder(); + builder.Set(builder.Length, false); + }); + } + } + } + } +} diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 02186378ad919..96c6fafee270c 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -126,7 +126,8 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private int Length { get; } public IArrowArray Array { get; private set; } @@ -317,6 +318,11 @@ public void Visit(FixedSizeBinaryType type) Array = new FixedSizeBinaryArray(arrayData); } + public void Visit(NullType type) + { + Array = new NullArray(Length); + } + private void GenerateArray(IArrowArrayBuilder builder, Func generator) where TArrayBuilder : IArrowArrayBuilder where TArray : IArrowArray From 2bcb347ec0c0604ccd8b37ec3942c841293c849e Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Wed, 10 May 2023 20:46:55 +0000 Subject: [PATCH 08/13] More fixes and better tests --- csharp/src/Apache.Arrow/Arrays/ArrayData.cs | 14 +++++++ csharp/src/Apache.Arrow/Arrays/NullArray.cs | 12 +----- csharp/src/Apache.Arrow/ArrowBuffer.cs | 2 +- .../src/Apache.Arrow/C/CArrowArrayExporter.cs | 9 +++-- .../C/CArrowArrayStreamExporter.cs | 12 ++++-- .../Apache.Arrow/C/CArrowSchemaExporter.cs | 7 ++-- .../Apache.Arrow/C/CArrowSchemaImporter.cs | 4 +- csharp/src/Apache.Arrow/C/NativeDelegate.cs | 36 +++++++++++++++++ .../Memory/ExportedAllocationOwner.cs | 1 + .../Memory/NativeMemoryManager.cs | 5 +-- csharp/src/Apache.Arrow/RecordBatch.cs | 7 ++++ .../IntegrationCommand.cs | 1 + .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 11 ++++- .../CDataInterfaceDataTests.cs | 5 ++- .../CDataInterfacePythonTests.cs | 40 +++++++++++++++++++ .../CDataInterfaceSchemaTests.cs | 2 + csharp/test/Apache.Arrow.Tests/TestData.cs | 10 +++-- 17 files changed, 142 insertions(+), 36 deletions(-) create mode 100644 csharp/src/Apache.Arrow/C/NativeDelegate.cs diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs index fb5aa1b5f23fa..de71a33239805 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs @@ -13,7 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +using Apache.Arrow.Memory; using Apache.Arrow.Types; +using FlatBuffers; using System; using System.Collections.Generic; using System.Linq; @@ -111,5 +113,17 @@ public ArrayData Slice(int offset, int length) return new ArrayData(DataType, length, RecalculateNullCount, offset, Buffers, Children, Dictionary); } + + public ArrayData Clone(MemoryAllocator allocator = default) + { + return new ArrayData( + DataType, + Length, + NullCount, + Offset, + Buffers?.Select(b => b.Clone(allocator))?.ToArray(), + Children?.Select(b => b.Clone(allocator))?.ToArray(), + Dictionary?.Clone(allocator)); + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/NullArray.cs b/csharp/src/Apache.Arrow/Arrays/NullArray.cs index 28c90293475a7..49c197485bc5b 100644 --- a/csharp/src/Apache.Arrow/Arrays/NullArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/NullArray.cs @@ -104,16 +104,6 @@ public void Dispose() { } public bool IsNull(int index) => true; public bool IsValid(int index) => false; - public void Accept(IArrowArrayVisitor visitor) - { - if (visitor is IArrowArrayVisitor nullVisitor) - { - nullVisitor.Visit(this); - } - else - { - visitor.Visit(this); - } - } + public void Accept(IArrowArrayVisitor visitor) => Array.Accept(this, visitor); } } diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.cs b/csharp/src/Apache.Arrow/ArrowBuffer.cs index 39fd7a31b9ca9..dbd97fc3aec9e 100644 --- a/csharp/src/Apache.Arrow/ArrowBuffer.cs +++ b/csharp/src/Apache.Arrow/ArrowBuffer.cs @@ -58,7 +58,7 @@ public ReadOnlySpan Span public ArrowBuffer Clone(MemoryAllocator allocator = default) { - return new Builder(Span.Length) + return Span.Length == 0 ? Empty : new Builder(Span.Length) .Append(Span) .Build(allocator); } diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs index c38589c45f7b5..4e11291cde4a8 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs @@ -23,6 +23,7 @@ namespace Apache.Arrow.C public static class CArrowArrayExporter { private unsafe delegate void ReleaseArrowArray(CArrowArray* cArray); + private static unsafe readonly NativeDelegate s_releaseArray = new NativeDelegate(ReleaseArray); /// /// Export an to a . Whether or not the @@ -57,7 +58,7 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) try { ConvertArray(allocationOwner, array.Data, cArray); - cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->release = (delegate* unmanaged[Stdcall])(IntPtr)s_releaseArray.Pointer; cArray->private_data = FromDisposable(allocationOwner); allocationOwner = null; } @@ -100,7 +101,7 @@ public static unsafe void ExportRecordBatch(RecordBatch batch, CArrowArray* cArr try { ConvertRecordBatch(allocationOwner, batch, cArray); - cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->release = (delegate* unmanaged[Stdcall])s_releaseArray.Pointer; cArray->private_data = FromDisposable(allocationOwner); allocationOwner = null; } @@ -115,7 +116,7 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr cArray->length = array.Length; cArray->offset = array.Offset; cArray->null_count = array.NullCount; - cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->release = (delegate* unmanaged[Stdcall])s_releaseArray.Pointer; cArray->private_data = null; cArray->n_buffers = array.Buffers?.Length ?? 0; @@ -160,7 +161,7 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne cArray->length = batch.Length; cArray->offset = 0; cArray->null_count = 0; - cArray->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(ReleaseArray); + cArray->release = (delegate* unmanaged[Stdcall])s_releaseArray.Pointer; cArray->private_data = null; cArray->n_buffers = 1; diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs index a7ec40283a88a..2b4a5b216593b 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs @@ -23,9 +23,13 @@ namespace Apache.Arrow.C public static class CArrowArrayStreamExporter { private unsafe delegate int GetSchemaArrayStream(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema); + private static unsafe NativeDelegate s_getSchemaArrayStream = new NativeDelegate(GetSchema); private unsafe delegate int GetNextArrayStream(CArrowArrayStream* cArrayStream, CArrowArray* cArray); + private static unsafe NativeDelegate s_getNextArrayStream = new NativeDelegate(GetNext); private unsafe delegate byte* GetLastErrorArrayStream(CArrowArrayStream* cArrayStream); + private static unsafe NativeDelegate s_getLastErrorArrayStream = new NativeDelegate(GetLastError); private unsafe delegate void ReleaseArrayStream(CArrowArrayStream* cArrayStream); + private static unsafe NativeDelegate s_releaseArrayStream = new NativeDelegate(Release); /// /// Export an to a . @@ -55,10 +59,10 @@ public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArro } cArrayStream->private_data = ExportedArrayStream.Export(arrayStream); - cArrayStream->get_schema = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetSchema); - cArrayStream->get_next = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetNext); - cArrayStream->get_last_error = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(GetLastError); - cArrayStream->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(Release); + cArrayStream->get_schema = (delegate* unmanaged[Stdcall])s_getSchemaArrayStream.Pointer; + cArrayStream->get_next = (delegate* unmanaged[Stdcall])s_getNextArrayStream.Pointer; + cArrayStream->get_last_error = (delegate* unmanaged[Stdcall])s_getLastErrorArrayStream.Pointer; + cArrayStream->release = (delegate* unmanaged[Stdcall])s_releaseArrayStream.Pointer; } private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index 75fb5fb9575ea..c2745c4cc4cc1 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -17,7 +17,6 @@ using System; using System.Collections.Generic; using System.IO; -using System.Linq; using System.Runtime.InteropServices; using Apache.Arrow.Types; @@ -25,6 +24,9 @@ namespace Apache.Arrow.C { public static class CArrowSchemaExporter { + private unsafe delegate void ReleaseArrowSchema(CArrowSchema* cArray); + private static unsafe readonly NativeDelegate s_releaseSchema = new NativeDelegate(ReleaseCArrowSchema); + /// /// Export a type to a . /// @@ -63,8 +65,7 @@ public static unsafe void ExportType(IArrowType datatype, CArrowSchema* schema) schema->dictionary = ConstructDictionary(datatype); - schema->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate( - ReleaseCArrowSchema); + schema->release = (delegate* unmanaged[Stdcall])s_releaseSchema.Pointer; schema->private_data = null; } diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index 8e0b5e21b2383..a454ae6ba15b6 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -227,7 +227,7 @@ public ArrowType GetAsType() _ => throw new InvalidDataException($"Unsupported time unit for import: {format[2]}"), }; - string timezone = format.Split(':')[1]; + string timezone = format.Substring(format.IndexOf(':') + 1); return new TimestampType(timeUnit, timezone); } @@ -298,4 +298,4 @@ public Schema GetAsSchema() } } } -} \ No newline at end of file +} diff --git a/csharp/src/Apache.Arrow/C/NativeDelegate.cs b/csharp/src/Apache.Arrow/C/NativeDelegate.cs new file mode 100644 index 0000000000000..f67ea5aefb837 --- /dev/null +++ b/csharp/src/Apache.Arrow/C/NativeDelegate.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Runtime.InteropServices; + +namespace Apache.Arrow.C +{ + internal struct NativeDelegate + { + private readonly T _managedDelegate; // For lifetime management + private readonly IntPtr _nativePointer; + + public NativeDelegate(T managedDelegate) + { + _managedDelegate = managedDelegate; + _nativePointer = Marshal.GetFunctionPointerForDelegate(managedDelegate); + } + + public IntPtr Pointer { get { return _nativePointer; } } + } +} diff --git a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs index 0936bc17bbd4b..4d33f5a945b38 100644 --- a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs +++ b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs @@ -56,6 +56,7 @@ public void Dispose() _pointers[i] = IntPtr.Zero; } } + GC.SuppressFinalize(this); } } } diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index e2bce374e9d3d..8f0210b28240f 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -28,11 +28,8 @@ public class NativeMemoryManager : MemoryManager, IOwnableAllocation private readonly INativeAllocationOwner _owner; public NativeMemoryManager(IntPtr ptr, int offset, int length) + : this(NativeMemoryAllocator.ExclusiveOwner, ptr, offset, length) { - _ptr = ptr; - _offset = offset; - _length = length; - _owner = NativeMemoryAllocator.ExclusiveOwner; } internal NativeMemoryManager(INativeAllocationOwner owner, IntPtr ptr, int offset, int length) diff --git a/csharp/src/Apache.Arrow/RecordBatch.cs b/csharp/src/Apache.Arrow/RecordBatch.cs index 6e97100686ee7..f87081d2987ac 100644 --- a/csharp/src/Apache.Arrow/RecordBatch.cs +++ b/csharp/src/Apache.Arrow/RecordBatch.cs @@ -18,6 +18,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using Apache.Arrow.Memory; namespace Apache.Arrow { @@ -86,5 +87,11 @@ internal RecordBatch(Schema schema, IMemoryOwner memoryOwner, List arrays = _arrays.Select(array => ArrowArrayFactory.BuildArray(array.Data.Clone(allocator))); + return new RecordBatch(Schema, arrays, Length); + } } } diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs index 9bb566220c5d2..7c3b6da8fc898 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs @@ -173,6 +173,7 @@ private static IArrowType ToArrowType(JsonArrowType type) "date" => ToDateArrowType(type), "time" => ToTimeArrowType(type), "timestamp" => ToTimestampArrowType(type), + "null" => NullType.Default, _ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}") }; } diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 13197bbe1669e..acfe72f83195e 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -293,7 +293,16 @@ private void CompareArrays(ListArray actualArray) Assert.Equal(expectedArray.Offset, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); - Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + + if (_strictCompare) + { + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + } + else + { + int offsetsLength = (expectedArray.Length + 1) * 4; + Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength))); + } actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); } diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs index 98cd5a9c5e973..7039ccdfe2dd1 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs @@ -68,8 +68,7 @@ public unsafe void CallsReleaseForValid() [Fact] public unsafe void CallsReleaseForInvalid() { - // Make sure we call release callback, even if the imported schema - // is invalid. + // Make sure we call release callback, even if the imported array is invalid. CArrowArray* cArray = CArrowArray.Create(); bool wasCalled = false; @@ -87,6 +86,8 @@ public unsafe void CallsReleaseForInvalid() }); Assert.True(wasCalled); CArrowArray.Free(cArray); + + GC.KeepAlive(releaseCallback); } } } diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 9ed2d3618372a..6485d215f27c7 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -574,6 +574,46 @@ public unsafe void ExportBatch() CArrowSchema.Free(cSchema); } + [SkippableFact] + public unsafe void RoundTripTestBatch() + { + RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, createDictionaryArray: true, time64TimeUnit: TimeUnit.Microsecond); + RecordBatch batch2 = batch1.Clone(); + + CArrowArray* cExportArray = CArrowArray.Create(); + CArrowArrayExporter.ExportRecordBatch(batch1, cExportArray); + + CArrowSchema* cExportSchema = CArrowSchema.Create(); + CArrowSchemaExporter.ExportSchema(batch1.Schema, cExportSchema); + + CArrowArray* cImportArray = CArrowArray.Create(); + CArrowSchema* cImportSchema = CArrowSchema.Create(); + + // For Python, we need to provide the pointers + long exportArrayPtr = ((IntPtr)cExportArray).ToInt64(); + long exportSchemaPtr = ((IntPtr)cExportSchema).ToInt64(); + long importArrayPtr = ((IntPtr)cImportArray).ToInt64(); + long importSchemaPtr = ((IntPtr)cImportSchema).ToInt64(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + dynamic exportedPyArray = pa.RecordBatch._import_from_c(exportArrayPtr, exportSchemaPtr); + exportedPyArray._export_to_c(importArrayPtr, importSchemaPtr); + } + + Schema schema = CArrowSchemaImporter.ImportSchema(cImportSchema); + RecordBatch importedBatch = CArrowArrayImporter.ImportRecordBatch(cImportArray, schema); + + ArrowReaderVerifier.CompareBatches(batch2, importedBatch, strictCompare: false); // Non-strict because span lengths won't match. + + // Since we allocated, we are responsible for freeing the pointer. + CArrowArray.Free(cExportArray); + CArrowSchema.Free(cExportSchema); + CArrowArray.Free(cImportArray); + CArrowSchema.Free(cImportSchema); + } + [SkippableFact] public unsafe void ExportBatchReader() { diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs index 357a18816cacc..a75d777ad318e 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs @@ -114,6 +114,8 @@ public unsafe void CallsReleaseForInvalid() }); Assert.True(wasCalled); CArrowSchema.Free(cSchema); + + GC.KeepAlive(releaseCallback); } } } diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 96c6fafee270c..aa7d1279798c3 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -23,13 +23,15 @@ namespace Apache.Arrow.Tests { public static class TestData { - public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false) + public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false, TimeUnit? time64TimeUnit = null) { - return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray); + return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray, time64TimeUnit); } - public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays) + public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays, TimeUnit? time64TimeUnit = null) { + Time64Type time64Type = time64TimeUnit == null ? Time64Type.Default : new Time64Type(time64TimeUnit.Value); + Schema.Builder builder = new Schema.Builder(); for (int i = 0; i < columnSetCount; i++) { @@ -48,7 +50,7 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount builder.Field(CreateField(Date32Type.Default, i)); builder.Field(CreateField(Date64Type.Default, i)); builder.Field(CreateField(Time32Type.Default, i)); - builder.Field(CreateField(Time64Type.Default, i)); + builder.Field(CreateField(time64Type, i)); builder.Field(CreateField(TimestampType.Default, i)); builder.Field(CreateField(StringType.Default, i)); builder.Field(CreateField(new StructType(new List { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); From bfcab3abdb06f2391102bd239ef046c43634ce85 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Thu, 11 May 2023 00:09:56 +0000 Subject: [PATCH 09/13] Fix null integration test --- .../test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs index 7c3b6da8fc898..5b1ed98993812 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs @@ -329,9 +329,7 @@ public void Visit(Decimal256Type type) public void Visit(NullType type) { - var json = JsonFieldData.Data.GetRawText(); - int?[] values = JsonSerializer.Deserialize(json, s_options); - Array = new NullArray(values.Length); + Array = new NullArray(JsonFieldData.Count); } private ArrayData GetDecimalArrayData(FixedSizeBinaryType type) From 4eb9c1fc9cbee50abbdb9127064647232aa4ef83 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Thu, 11 May 2023 02:25:56 +0000 Subject: [PATCH 10/13] Fix Null type serialization to flatbuffers. --- csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 016e379e5bbf0..400937669e309 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -221,7 +221,10 @@ public void Visit(FixedSizeBinaryType type) public void Visit(NullType type) { - Result = new FieldType(Flatbuf.Type.Null, 0); + Flatbuf.Null.StartNull(Builder); + Result = FieldType.Build( + Flatbuf.Type.Null, + Flatbuf.Null.EndNull(Builder)); } public void Visit(IArrowType type) From edc74af34463ac1ebc9a83e4d24d4cbf6358138d Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 15 May 2023 13:17:51 -0700 Subject: [PATCH 11/13] Make changes suggested by code review --- csharp/src/Apache.Arrow/C/CArrowArray.cs | 8 +- csharp/src/Apache.Arrow/C/NativeDelegate.cs | 2 +- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 1 + .../CDataInterfacePythonTests.cs | 75 ++++++++++++- .../test/Apache.Arrow.Tests/NullArrayTests.cs | 101 ------------------ docs/source/status.rst | 40 +++---- 6 files changed, 99 insertions(+), 128 deletions(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowArray.cs b/csharp/src/Apache.Arrow/C/CArrowArray.cs index 37ff3760b6593..4aba95add0da4 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArray.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArray.cs @@ -71,14 +71,14 @@ public unsafe struct CArrowArray /// /// Do not call this on a pointer that was allocated elsewhere. /// - public static void Free(CArrowArray* schema) + public static void Free(CArrowArray* array) { - if (schema->release != null) + if (array->release != null) { // Call release if not already called. - schema->release(schema); + array->release(array); } - Marshal.FreeHGlobal((IntPtr)schema); + Marshal.FreeHGlobal((IntPtr)array); } } } diff --git a/csharp/src/Apache.Arrow/C/NativeDelegate.cs b/csharp/src/Apache.Arrow/C/NativeDelegate.cs index f67ea5aefb837..48b37fbfe5582 100644 --- a/csharp/src/Apache.Arrow/C/NativeDelegate.cs +++ b/csharp/src/Apache.Arrow/C/NativeDelegate.cs @@ -20,7 +20,7 @@ namespace Apache.Arrow.C { - internal struct NativeDelegate + internal readonly struct NativeDelegate { private readonly T _managedDelegate; // For lifetime management private readonly IntPtr _nativePointer; diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 92e5965c8ab88..5ffda3dfba6f5 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -159,6 +159,7 @@ public void Visit(DictionaryArray array) public void Visit(NullArray array) { + // There are no buffers for a NullArray } private void CreateBuffers(BooleanArray array) diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 6485d215f27c7..3d8dbf779f2a1 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -437,9 +437,14 @@ public unsafe void ImportArray() } ArrowType type = CArrowSchemaImporter.ImportType(cSchema); - IArrowArray importedArray = CArrowArrayImporter.ImportArray(cArray, type); + StringArray importedArray = (StringArray)CArrowArrayImporter.ImportArray(cArray, type); Assert.Equal(5, importedArray.Length); + Assert.Equal("hello", importedArray.GetString(0)); + Assert.Equal("world", importedArray.GetString(1)); + Assert.Null(importedArray.GetString(2)); + Assert.Equal("foo", importedArray.GetString(3)); + Assert.Equal("bar", importedArray.GetString(4)); } [SkippableFact] @@ -472,7 +477,7 @@ public unsafe void ImportRecordBatch() pa.array(List("foo", "bar")) ), }), - new[] { "col1", "col1", "col2", "col3", "col4", "col5", "col6" }); + new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7" }); dynamic batch = table.to_batches()[0]; @@ -485,6 +490,53 @@ public unsafe void ImportRecordBatch() RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema); Assert.Equal(5, recordBatch.Length); + + NullArray col1 = (NullArray)recordBatch.Column("col1"); + Assert.Equal(5, col1.Length); + + Int64Array col2 = (Int64Array)recordBatch.Column("col2"); + Assert.Equal(new long[] { 1, 2, 3, 0, 5 }, col2.Values.ToArray()); + Assert.False(col2.IsValid(3)); + + StringArray col3 = (StringArray)recordBatch.Column("col3"); + Assert.Equal(5, col3.Length); + Assert.Equal("hello", col3.GetString(0)); + Assert.Equal("world", col3.GetString(1)); + Assert.Null(col3.GetString(2)); + Assert.Equal("foo", col3.GetString(3)); + Assert.Equal("bar", col3.GetString(4)); + + DoubleArray col4 = (DoubleArray)recordBatch.Column("col4"); + Assert.Equal(new double[] { 0.0, 1.4, 2.5, 3.6, 4.7 }, col4.Values.ToArray()); + + ListArray col5 = (ListArray)recordBatch.Column("col5"); + Assert.Equal(new long[] { 1, 2, 3, 4, 5, 4, 3 }, ((Int64Array)col5.Values).Values.ToArray()); + Assert.Equal(new int[] { 0, 2, 4, 4, 4, 7 }, col5.ValueOffsets.ToArray()); + Assert.False(col5.IsValid(2)); + Assert.False(col5.IsValid(3)); + + StructArray col6 = (StructArray)recordBatch.Column("col6"); + Assert.Equal(5, col6.Length); + Int64Array col6a = (Int64Array)col6.Fields[0]; + Assert.Equal(new long[] { 10, 9, 0, 0, 0 }, col6a.Values.ToArray()); + StringArray col6b = (StringArray)col6.Fields[1]; + Assert.Equal("banana", col6b.GetString(0)); + Assert.Equal("apple", col6b.GetString(1)); + Assert.Equal("orange", col6b.GetString(2)); + Assert.Equal("cherry", col6b.GetString(3)); + Assert.Equal("grape", col6b.GetString(4)); + DoubleArray col6c = (DoubleArray)col6.Fields[2]; + Assert.Equal(new double[] { 0, 4.3, -9, 123.456, 0 }, col6c.Values.ToArray()); + + DictionaryArray col7 = (DictionaryArray)recordBatch.Column("col7"); + Assert.Equal(5, col7.Length); + Int64Array col7a = (Int64Array)col7.Indices; + Assert.Equal(new long[] { 1, 0, 1, 1, 0 }, col7a.Values.ToArray()); + Assert.False(col7a.IsValid(4)); + StringArray col7b = (StringArray)col7.Dictionary; + Assert.Equal(2, col7b.Length); + Assert.Equal("foo", col7b.GetString(0)); + Assert.Equal("bar", col7b.GetString(1)); } [SkippableFact] @@ -514,6 +566,25 @@ public unsafe void ImportArrayStream() var batch1 = stream.ReadNextRecordBatchAsync().Result; Assert.Equal(5, batch1.Length); + Int64Array col1 = (Int64Array)batch1.Column("col1"); + Assert.Equal(5, col1.Length); + Assert.Equal(1, col1.GetValue(0)); + Assert.Equal(2, col1.GetValue(1)); + Assert.Equal(3, col1.GetValue(2)); + Assert.Null(col1.GetValue(3)); + Assert.Equal(5, col1.GetValue(4)); + + StringArray col2 = (StringArray)batch1.Column("col2"); + Assert.Equal(5, col2.Length); + Assert.Equal("hello", col2.GetString(0)); + Assert.Equal("world", col2.GetString(1)); + Assert.Null(col2.GetString(2)); + Assert.Equal("foo", col2.GetString(3)); + Assert.Equal("bar", col2.GetString(4)); + + DoubleArray col3 = (DoubleArray)batch1.Column("col3"); + Assert.Equal(new double[] { 0.0, 1.4, 2.5, 3.6, 4.7 }, col3.Values.ToArray()); + var batch2 = stream.ReadNextRecordBatchAsync().Result; Assert.Null(batch2); } diff --git a/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs b/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs index 22c98dc0c59ba..8cb5b32b06988 100644 --- a/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/NullArrayTests.cs @@ -13,8 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System; -using System.Linq; using Xunit; namespace Apache.Arrow.Tests @@ -58,105 +56,6 @@ public void ClearsArray() Assert.Equal(0, array.Length); } } - - public class Swap - { - [Fact] - public void SwapsExpectedBits() - { - var array = new BooleanArray.Builder() - .AppendRange(Enumerable.Repeat(false, 8)) - .Set(0, true) - .Swap(0, 7) - .Build(); - - Assert.True(array.GetValue(0).HasValue); - Assert.False(array.GetValue(0).Value); - Assert.True(array.GetValue(7).HasValue); - Assert.True(array.GetValue(7).Value); - #pragma warning disable CS0618 - Assert.False(array.GetBoolean(0)); - Assert.True(array.GetBoolean(7)); - #pragma warning restore CS0618 - } - - [Fact] - public void ThrowsWhenIndexOutOfRange() - { - Assert.Throws(() => - { - var builder = new BooleanArray.Builder(); - builder.Swap(0, 1); - }); - } - } - - public class Set - { - [Theory] - [InlineData(8, 0)] - [InlineData(8, 4)] - [InlineData(8, 7)] - [InlineData(16, 8)] - [InlineData(16, 15)] - public void SetsExpectedBitToTrue(int length, int index) - { - var array = new BooleanArray.Builder() - .Resize(length) - .Set(index, true) - .Build(); - - Assert.True(array.GetValue(index).Value); - } - - [Theory] - [InlineData(8, 0)] - [InlineData(8, 4)] - [InlineData(8, 7)] - [InlineData(16, 8)] - [InlineData(16, 15)] - public void SetsExpectedBitsToFalse(int length, int index) - { - var array = new BooleanArray.Builder() - .Resize(length) - .Set(index, false) - .Build(); - - Assert.False(array.GetValue(index).Value); - } - - [Theory] - [InlineData(4)] - public void UnsetBitsAreUnchanged(int index) - { - var array = new BooleanArray.Builder() - .AppendRange(Enumerable.Repeat(false, 8)) - .Set(index, true) - .Build(); - - for (var i = 0; i < 8; i++) - { - if (i != index) - { - Assert.True(array.GetValue(i).HasValue); - Assert.False(array.GetValue(i).Value); - #pragma warning disable CS0618 - Assert.False(array.GetBoolean(i)); - #pragma warning restore CS0618 - } - } - } - - [Fact] - public void ThrowsWhenIndexOutOfRange() - { - Assert.Throws(() => - { - var builder = new BooleanArray.Builder(); - builder.Set(builder.Length, false); - }); - } - } } } } diff --git a/docs/source/status.rst b/docs/source/status.rst index 930ce07169ad4..21afc32efe16b 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -290,18 +290,18 @@ support/not support individual features. C Data Interface ================ -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Feature | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby | Julia | -| | | | | | | | | | | -+=============================+=====+========+===+======+====+======+========+======+=======+ -| Schema export | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Array export | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Schema import | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Array import | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Feature | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby | Julia | C# | +| | | | | | | | | | | | ++=============================+=====+========+===+======+====+======+========+======+=======+=====+ +| Schema export | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Array export | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Schema import | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Array import | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ .. seealso:: The :ref:`C Data Interface ` specification. @@ -310,14 +310,14 @@ C Data Interface C Stream Interface ================== -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Feature | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby | Julia | -| | | | | | | | | | | -+=============================+=====+========+===+======+====+======+========+======+=======+ -| Stream export | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ -| Stream import | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | -+-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Feature | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby | Julia | C# | +| | | | | | | | | | | | ++=============================+=====+========+===+======+====+======+========+======+=======+=====+ +| Stream export | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ +| Stream import | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ++-----------------------------+-----+--------+---+------+----+------+--------+------+-------+-----+ .. seealso:: The :ref:`C Stream Interface ` specification. From ad7907bd1f0cd6afa88a3efcf28c522fd2837aa7 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Wed, 17 May 2023 16:19:45 +0000 Subject: [PATCH 12/13] Changed the default Time64Type to have a valid time unit. --- csharp/src/Apache.Arrow/Types/Time64Type.cs | 2 +- .../Apache.Arrow.Tests/CDataInterfacePythonTests.cs | 2 +- csharp/test/Apache.Arrow.Tests/TestData.cs | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/csharp/src/Apache.Arrow/Types/Time64Type.cs b/csharp/src/Apache.Arrow/Types/Time64Type.cs index 5d6c2e46e1b56..2777deb228ed8 100644 --- a/csharp/src/Apache.Arrow/Types/Time64Type.cs +++ b/csharp/src/Apache.Arrow/Types/Time64Type.cs @@ -24,7 +24,7 @@ public sealed class Time64Type : TimeType public override string Name => "time64"; public override int BitWidth => 64; - public Time64Type(TimeUnit unit = TimeUnit.Millisecond) + public Time64Type(TimeUnit unit = TimeUnit.Nanosecond) : base(unit) { } public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 6485d215f27c7..720e395337b39 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -577,7 +577,7 @@ public unsafe void ExportBatch() [SkippableFact] public unsafe void RoundTripTestBatch() { - RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, createDictionaryArray: true, time64TimeUnit: TimeUnit.Microsecond); + RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, createDictionaryArray: true); RecordBatch batch2 = batch1.Clone(); CArrowArray* cExportArray = CArrowArray.Create(); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index aa7d1279798c3..96c6fafee270c 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -23,15 +23,13 @@ namespace Apache.Arrow.Tests { public static class TestData { - public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false, TimeUnit? time64TimeUnit = null) + public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false) { - return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray, time64TimeUnit); + return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray); } - public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays, TimeUnit? time64TimeUnit = null) + public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays) { - Time64Type time64Type = time64TimeUnit == null ? Time64Type.Default : new Time64Type(time64TimeUnit.Value); - Schema.Builder builder = new Schema.Builder(); for (int i = 0; i < columnSetCount; i++) { @@ -50,7 +48,7 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount builder.Field(CreateField(Date32Type.Default, i)); builder.Field(CreateField(Date64Type.Default, i)); builder.Field(CreateField(Time32Type.Default, i)); - builder.Field(CreateField(time64Type, i)); + builder.Field(CreateField(Time64Type.Default, i)); builder.Field(CreateField(TimestampType.Default, i)); builder.Field(CreateField(StringType.Default, i)); builder.Field(CreateField(new StructType(new List { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); From f816507af2fd690ebe9025fa85d479b0e6f2fe2b Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Wed, 17 May 2023 20:39:24 +0000 Subject: [PATCH 13/13] Changes suggested by code review. --- csharp/src/Apache.Arrow/Arrays/NullArray.cs | 13 +++++-------- .../src/Apache.Arrow/C/CArrowArrayStreamImporter.cs | 10 ++++++---- .../Apache.Arrow/Memory/ExportedAllocationOwner.cs | 2 ++ .../Apache.Arrow/Memory/ImportedAllocationOwner.cs | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/NullArray.cs b/csharp/src/Apache.Arrow/Arrays/NullArray.cs index 49c197485bc5b..762540065c929 100644 --- a/csharp/src/Apache.Arrow/Arrays/NullArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/NullArray.cs @@ -70,20 +70,17 @@ public Builder Resize(int length) _length = length; return this; } - - private void CheckIndex(int index) - { - if (index < 0 || index >= Length) - { - throw new ArgumentOutOfRangeException(nameof(index)); - } - } } public ArrayData Data { get; } public NullArray(ArrayData data) { + if (data.Length != data.NullCount) + { + throw new ArgumentException("Length must equal null count", nameof(data)); + } + data.EnsureDataType(ArrowTypeId.Null); data.EnsureBufferCount(0); Data = data; diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs index 1bfa90ac5c9e4..4a374388282a0 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs @@ -68,9 +68,10 @@ public ImportedArrowArrayStream(CArrowArrayStream* cArrayStream) CArrowSchema* cSchema = CArrowSchema.Create(); try { - if (_cArrayStream->get_schema(_cArrayStream, cSchema) != 0) + int errno = _cArrayStream->get_schema(_cArrayStream, cSchema); + if (errno != 0) { - throw new Exception("This needs to be better"); + throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}"); } _schema = CArrowSchemaImporter.ImportSchema(cSchema); } @@ -101,9 +102,10 @@ public ValueTask ReadNextRecordBatchAsync(CancellationToken cancell CArrowArray* cArray = CArrowArray.Create(); try { - if (_cArrayStream->get_next(_cArrayStream, cArray) != 0) + int errno = _cArrayStream->get_next(_cArrayStream, cArray); + if (errno != 0) { - throw new Exception("This too needs to be better"); + throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}"); } if (cArray->release != null) { diff --git a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs index 4d33f5a945b38..e872dc5425e06 100644 --- a/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs +++ b/csharp/src/Apache.Arrow/Memory/ExportedAllocationOwner.cs @@ -31,6 +31,7 @@ internal sealed class ExportedAllocationOwner : INativeAllocationOwner, IDisposa public IntPtr Allocate(int size) { + GC.AddMemoryPressure(size); return Acquire(Marshal.AllocHGlobal(size), 0, size); } @@ -56,6 +57,7 @@ public void Dispose() _pointers[i] = IntPtr.Zero; } } + GC.RemoveMemoryPressure(_allocationSize); GC.SuppressFinalize(this); } } diff --git a/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs b/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs index a3305718bc480..b29135d5aeb6d 100644 --- a/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs +++ b/csharp/src/Apache.Arrow/Memory/ImportedAllocationOwner.cs @@ -38,9 +38,9 @@ public IMemoryOwner AddMemory(IntPtr ptr, int offset, int length) NativeMemoryManager memory = new NativeMemoryManager(this, ptr, offset, length); Interlocked.Increment(ref _referenceCount); - Interlocked.Add(ref _managedMemory, length); if (length > 0) { + Interlocked.Add(ref _managedMemory, length); GC.AddMemoryPressure(length); } return memory;