Skip to content

Commit

Permalink
GH-33856: [C#] Implement C Data Interface for C# (#35496)
Browse files Browse the repository at this point in the history
### Rationale for this change

This continues implementing the C Data Interface for C# with integration for `ArrowArray`, `RecordBatch` and streams.

### What changes are included in this PR?

- Adds classes `CArrowArray` and `CArrowStream` to represent the C API structures.
- Adds interface `IArrowArrayStream` to represent an array stream or record batch reader.
- Adds classes `CArrowArrayImporter`, `CArrowArrayExporter`, `CArrowArrayStreamImporter` and `CArrowArrayExporter` to marshal between C# and C representations.
- Augments the native memory representation to support (reasonably safe) ownership of memory by external code.

### Are these changes tested?

Yes. Testing is largely done via the Python C API interface.

### Are there any user-facing changes?

Yes, this adds new user-facing APIs to import and export C# structures using the C API.

**This PR includes breaking changes to public APIs.**

The default time unit for Time64Type was previously milliseconds. This does not appear to be valid, so it has been changed to nanoseconds.

- Closes: #33856
- Closes: #33857
* Closes: #33856

Authored-by: Curt Hagenlocher <[email protected]>
Signed-off-by: Eric Erhardt <[email protected]>
  • Loading branch information
CurtHagenlocher authored May 22, 2023
1 parent 41ba4fe commit 0dca449
Show file tree
Hide file tree
Showing 37 changed files with 2,113 additions and 55 deletions.
14 changes: 14 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrayData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

using Apache.Arrow.Memory;
using Apache.Arrow.Types;
using FlatBuffers;
using System;
using System.Collections.Generic;
using System.Linq;
Expand Down Expand Up @@ -111,5 +113,17 @@ public ArrayData Slice(int offset, int length)

return new ArrayData(DataType, length, RecalculateNullCount, offset, Buffers, Children, Dictionary);
}

public ArrayData Clone(MemoryAllocator allocator = default)
{
return new ArrayData(
DataType,
Length,
NullCount,
Offset,
Buffers?.Select(b => b.Clone(allocator))?.ToArray(),
Children?.Select(b => b.Clone(allocator))?.ToArray(),
Dictionary?.Clone(allocator));
}
}
}
2 changes: 2 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public static IArrowArray BuildArray(ArrayData data)
{
switch (data.DataType.TypeId)
{
case ArrowTypeId.Null:
return new NullArray(data);
case ArrowTypeId.Boolean:
return new BooleanArray(data);
case ArrowTypeId.UInt8:
Expand Down
106 changes: 106 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/NullArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using Apache.Arrow.Types;
using Apache.Arrow.Memory;

namespace Apache.Arrow
{
public class NullArray : IArrowArray
{
public class Builder : IArrowArrayBuilder<NullArray, Builder>
{
private int _length;

public int Length => _length;
public int Capacity => _length;
public int NullCount => _length;

public Builder()
{
}

public Builder AppendNull()
{
_length++;
return this;
}

public NullArray Build(MemoryAllocator allocator = default)
{
return new NullArray(_length);
}

public Builder Clear()
{
_length = 0;
return this;
}

public Builder Reserve(int capacity)
{
if (capacity < 0)
{
throw new ArgumentOutOfRangeException(nameof(capacity));
}

return this;
}

public Builder Resize(int length)
{
if (length < 0)
{
throw new ArgumentOutOfRangeException(nameof(length));
}

_length = length;
return this;
}
}

public ArrayData Data { get; }

public NullArray(ArrayData data)
{
if (data.Length != data.NullCount)
{
throw new ArgumentException("Length must equal null count", nameof(data));
}

data.EnsureDataType(ArrowTypeId.Null);
data.EnsureBufferCount(0);
Data = data;
}

public NullArray(int length)
: this(new ArrayData(NullType.Default, length, length, buffers: System.Array.Empty<ArrowBuffer>()))
{
}

public int Length => Data.Length;

public int Offset => Data.Offset;

public int NullCount => Data.NullCount;

public void Dispose() { }
public bool IsNull(int index) => true;
public bool IsValid(int index) => false;

public void Accept(IArrowArrayVisitor visitor) => Array.Accept(this, visitor);
}
}
21 changes: 20 additions & 1 deletion csharp/src/Apache.Arrow/ArrowBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public ReadOnlySpan<byte> Span

public ArrowBuffer Clone(MemoryAllocator allocator = default)
{
return new Builder<byte>(Span.Length)
return Span.Length == 0 ? Empty : new Builder<byte>(Span.Length)
.Append(Span)
.Build(allocator);
}
Expand All @@ -72,5 +72,24 @@ public void Dispose()
{
_memoryOwner?.Dispose();
}

internal bool TryExport(ExportedAllocationOwner newOwner, out IntPtr ptr)
{
if (_memoryOwner == null && IsEmpty)
{
ptr = IntPtr.Zero;
return true;
}

if (_memoryOwner is IOwnableAllocation ownable && ownable.TryAcquire(out ptr, out int offset, out int length))
{
newOwner.Acquire(ptr, offset, length);
ptr += offset;
return true;
}

ptr = IntPtr.Zero;
return false;
}
}
}
84 changes: 84 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

using System;
using System.Runtime.InteropServices;

namespace Apache.Arrow.C
{
/// <summary>
/// An Arrow C Data Interface Schema, which represents the data in an exported array or record batch.
/// </summary>
/// <remarks>
/// This is used to export <see cref="RecordBatch"/> or <see cref="IArrowArray"/> to other languages. It matches
/// the layout of the ArrowArray struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h.
/// </remarks>
[StructLayout(LayoutKind.Sequential)]
public unsafe struct CArrowArray
{
public long length;
public long null_count;
public long offset;
public long n_buffers;
public long n_children;
public byte** buffers;
public CArrowArray** children;
public CArrowArray* dictionary;
public delegate* unmanaged[Stdcall]<CArrowArray*, void> release;
public void* private_data;

/// <summary>
/// Allocate and zero-initialize an unmanaged pointer of this type.
/// </summary>
/// <remarks>
/// This pointer must later be freed by <see cref="Free"/>.
/// </remarks>
public static CArrowArray* Create()
{
var ptr = (CArrowArray*)Marshal.AllocHGlobal(sizeof(CArrowArray));

ptr->length = 0;
ptr->n_buffers = 0;
ptr->offset = 0;
ptr->buffers = null;
ptr->n_children = 0;
ptr->children = null;
ptr->dictionary = null;
ptr->null_count = 0;
ptr->release = null;
ptr->private_data = null;

return ptr;
}

/// <summary>
/// Free a pointer that was allocated in <see cref="Create"/>.
/// </summary>
/// <remarks>
/// Do not call this on a pointer that was allocated elsewhere.
/// </remarks>
public static void Free(CArrowArray* array)
{
if (array->release != null)
{
// Call release if not already called.
array->release(array);
}
Marshal.FreeHGlobal((IntPtr)array);
}
}
}
Loading

0 comments on commit 0dca449

Please sign in to comment.