Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35809: [C#] Improvements to the C Data Interface #35810

Merged
merged 15 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions csharp/src/Apache.Arrow/Apache.Arrow.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,8 @@
<ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible($(TargetFramework), 'net5.0'))">
<Compile Remove="Arrays\HalfFloatArray.cs" />
</ItemGroup>
<ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible($(TargetFramework), 'net5.0'))">
<!-- Code targeting .NET 5+ should use [UnmanagedCallersOnly]. -->
<Compile Remove="C\NativeDelegate.cs" />
</ItemGroup>
</Project>
11 changes: 1 addition & 10 deletions csharp/src/Apache.Arrow/C/CArrowArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,7 @@ public unsafe struct CArrowArray
{
var ptr = (CArrowArray*)Marshal.AllocHGlobal(sizeof(CArrowArray));

ptr->length = 0;
ptr->n_buffers = 0;
ptr->offset = 0;
ptr->buffers = null;
ptr->n_children = 0;
ptr->children = null;
ptr->dictionary = null;
ptr->null_count = 0;
ptr->release = null;
ptr->private_data = null;
*ptr = default;

return ptr;
}
Expand Down
28 changes: 18 additions & 10 deletions csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,21 @@


using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Apache.Arrow.Memory;

namespace Apache.Arrow.C
{
public static class CArrowArrayExporter
{
#if NET5_0_OR_GREATER
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps add a comment explaining why this needs .Net 5.0+? Or will it be obvious to a C# developer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UnmanagedCallersOnlyAttribute was introduced in .NET 5.

private static unsafe delegate* unmanaged[Stdcall]<CArrowArray*, void> ReleaseArrayPtr => &ReleaseArray;
#else
private unsafe delegate void ReleaseArrowArray(CArrowArray* cArray);
private static unsafe readonly NativeDelegate<ReleaseArrowArray> s_releaseArray = new NativeDelegate<ReleaseArrowArray>(ReleaseArray);

private static unsafe delegate* unmanaged[Stdcall]<CArrowArray*, void> ReleaseArrayPtr => (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer;
#endif
/// <summary>
/// Export an <see cref="IArrowArray"/> to a <see cref="CArrowArray"/>. Whether or not the
/// export succeeds, the original array becomes invalid. Clone an array to continue using it
Expand Down Expand Up @@ -58,7 +63,7 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray)
try
{
ConvertArray(allocationOwner, array.Data, cArray);
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)(IntPtr)s_releaseArray.Pointer;
cArray->release = ReleaseArrayPtr;
cArray->private_data = FromDisposable(allocationOwner);
allocationOwner = null;
}
Expand Down Expand Up @@ -101,7 +106,7 @@ public static unsafe void ExportRecordBatch(RecordBatch batch, CArrowArray* cArr
try
{
ConvertRecordBatch(allocationOwner, batch, cArray);
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer;
cArray->release = ReleaseArrayPtr;
cArray->private_data = FromDisposable(allocationOwner);
allocationOwner = null;
}
Expand All @@ -116,7 +121,7 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
cArray->length = array.Length;
cArray->offset = array.Offset;
cArray->null_count = array.NullCount;
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer;
cArray->release = ReleaseArrayPtr;
cArray->private_data = null;

cArray->n_buffers = array.Buffers?.Length ?? 0;
Expand Down Expand Up @@ -161,7 +166,7 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne
cArray->length = batch.Length;
cArray->offset = 0;
cArray->null_count = 0;
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer;
cArray->release = ReleaseArrayPtr;
cArray->private_data = null;

cArray->n_buffers = 1;
Expand All @@ -184,13 +189,12 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne
cArray->dictionary = null;
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the default calling convention be used instead? I'm not sure stdcall is ok on non-Windows platforms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #34133 (comment). Ideally we would have used the default calling convention, but that would not be suppported on anything earlier than .NET 5. And in 64-bit platforms the calling convention doesn't matter either way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the entire point (mostly) of implementing the C Data Interface is to be compatible with non-.Net producers/consumers. Those are extremely likely to use the platform default. So we should get it right at least when possible, i.e. on .Net >= 5.0.

As for https://stackoverflow.com/questions/34832679/is-the-callingconvention-ignored-in-64-bit-net-applications , does it apply here? It's talking about DllImport, which might be different from this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case, keeping the default convention seems more theoretically sound (and forward-looking, perhaps).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @westonpace @lidavidm for opinions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, are you saying to change this:

public delegate* unmanaged[Stdcall]<CArrowArray*, void> release;

into this?

#if NET5_0_OR_GREATER
public delegate* unmanaged<CArrowArray*, void> release;
#else
public delegate* unmanaged[Stdcall]<CArrowArray*, void> release;
#endif

That would cause an incompatible API surface between the assembly compiled for .NET 6 and that compiled for the earlier frameworks. We have two options:

  • Lie and keep the stdcall calling convention on the function pointers.
  • Use the default unmanaged calling convention but support the C interface only on .NET 6+ (we don't target 5 as it is unsupported).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, why is this release member public here? It will be exposed to C Data Interface consumers as the release pointer, but needn't (and probably shouldn't) be part of the Arrow C# API.

Arrow C# API users should only see the high-level import and export methods such as ImportArray and ExportArray.

(an important thing to understand is that the C Data Interface is a binary interface, not an API)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you take a look for example at the Arrow C++ implementation, its release functions are entirely private. For example ReleaseExportedSchema below is in the anonymous namespace, which doesn't expose the function publicly:

void ReleaseExportedSchema(struct ArrowSchema* schema) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I should make the members of these structs private? That also works.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there were some reason for the fields to stay public, this one could also probably be defined as a union between a public IntPtr and a private delegate. (I don't know why the fields would need to be public; I'm pretty sure I just followed the pattern that was present for schemas.)

#endif
private unsafe static void ReleaseArray(CArrowArray* cArray)
{
if (cArray->private_data != null)
{
Dispose(&cArray->private_data);
}
cArray->private_data = null;
Dispose(&cArray->private_data);
cArray->release = null;
}

Expand All @@ -203,6 +207,10 @@ private unsafe static void ReleaseArray(CArrowArray* cArray)
private unsafe static void Dispose(void** ptr)
{
GCHandle gch = GCHandle.FromIntPtr((IntPtr)(*ptr));
if (!gch.IsAllocated)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is effectively a noop. If the pointer was null, the previous line will throw InvalidOperationException and if it wasn't null then IsAllocated will return true.

The overall change here also means that calling ReleaseArray twice will now throw an exception instead of the second call being a no-op.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment explaining when this might occur? (perhaps if an exception occurs while exporting the array?)

{
return;
}
((IDisposable)gch.Target).Dispose();
gch.Free();
*ptr = null;
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
case ArrowTypeId.Map:
break;
case ArrowTypeId.Null:
buffers = new ArrowBuffer[0];
buffers = System.Array.Empty<ArrowBuffer>();
break;
case ArrowTypeId.Dictionary:
DictionaryType dictionaryType = (DictionaryType)type;
Expand Down
6 changes: 1 addition & 5 deletions csharp/src/Apache.Arrow/C/CArrowArrayStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,7 @@ public unsafe struct CArrowArrayStream
{
var ptr = (CArrowArrayStream*)Marshal.AllocHGlobal(sizeof(CArrowArrayStream));

ptr->get_schema = null;
ptr->get_next = null;
ptr->get_last_error = null;
ptr->release = null;
ptr->private_data = null;
*ptr = default;

return ptr;
}
Expand Down
53 changes: 46 additions & 7 deletions csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,37 @@


using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Apache.Arrow.Ipc;

namespace Apache.Arrow.C
{
public static class CArrowArrayStreamExporter
{
#if NET5_0_OR_GREATER
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int> GetSchemaPtr => &GetSchema;
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int> GetNextPtr => &GetNext;
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*> GetLastErrorPtr => &GetLastError;
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, void> ReleasePtr => &Release;
#else
private unsafe delegate int GetSchemaArrayStream(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema);
private static unsafe NativeDelegate<GetSchemaArrayStream> s_getSchemaArrayStream = new NativeDelegate<GetSchemaArrayStream>(GetSchema);
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int> GetSchemaPtr =>
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer;
private unsafe delegate int GetNextArrayStream(CArrowArrayStream* cArrayStream, CArrowArray* cArray);
private static unsafe NativeDelegate<GetNextArrayStream> s_getNextArrayStream = new NativeDelegate<GetNextArrayStream>(GetNext);
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int> GetNextPtr =>
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int>)s_getNextArrayStream.Pointer;
private unsafe delegate byte* GetLastErrorArrayStream(CArrowArrayStream* cArrayStream);
private static unsafe NativeDelegate<GetLastErrorArrayStream> s_getLastErrorArrayStream = new NativeDelegate<GetLastErrorArrayStream>(GetLastError);
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*> GetLastErrorPtr =>
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*>)s_getLastErrorArrayStream.Pointer;
private unsafe delegate void ReleaseArrayStream(CArrowArrayStream* cArrayStream);
private static unsafe NativeDelegate<ReleaseArrayStream> s_releaseArrayStream = new NativeDelegate<ReleaseArrayStream>(Release);
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, void> ReleasePtr =>
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, void>)s_releaseArrayStream.Pointer;
#endif

/// <summary>
/// Export an <see cref="IArrowArrayStream"/> to a <see cref="CArrowArrayStream"/>.
Expand Down Expand Up @@ -59,12 +75,15 @@ public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArro
}

cArrayStream->private_data = ExportedArrayStream.Export(arrayStream);
cArrayStream->get_schema = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer;
cArrayStream->get_next = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int>)s_getNextArrayStream.Pointer;
cArrayStream->get_last_error = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*>)s_getLastErrorArrayStream.Pointer;
cArrayStream->release = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, void>)s_releaseArrayStream.Pointer;
cArrayStream->get_schema = GetSchemaPtr;
cArrayStream->get_next = GetNextPtr;
cArrayStream->get_last_error = GetLastErrorPtr;
cArrayStream->release = ReleasePtr;
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
#endif
private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema)
{
ExportedArrayStream arrayStream = null;
Expand All @@ -80,6 +99,9 @@ private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchem
}
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
#endif
private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* cArray)
{
ExportedArrayStream arrayStream = null;
Expand All @@ -100,6 +122,9 @@ private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray*
}
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
#endif
private unsafe static byte* GetLastError(CArrowArrayStream* cArrayStream)
{
try
Expand All @@ -113,10 +138,12 @@ private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray*
}
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
#endif
private unsafe static void Release(CArrowArrayStream* cArrayStream)
{
ExportedArrayStream arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data);
arrayStream.Dispose();
ExportedArrayStream.Free(&cArrayStream->private_data);
cArrayStream->release = null;
}

Expand All @@ -140,6 +167,18 @@ sealed unsafe class ExportedArrayStream : IDisposable
return (void*)GCHandle.ToIntPtr(gch);
}

public static void Free(void** ptr)
{
GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr);
if (!gch.IsAllocated)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Same comment about IsAllocated.)

{
return;
}
((ExportedArrayStream)gch.Target).Dispose();
gch.Free();
*ptr = null;
}

public static ExportedArrayStream FromPointer(void* ptr)
{
GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr);
Expand Down Expand Up @@ -170,7 +209,7 @@ void ReleaseLastError()
{
if (LastError != null)
{
Marshal.FreeCoTaskMem((IntPtr)LastError);
Marshal.FreeHGlobal((IntPtr)LastError);
LastError = null;
}
}
Expand Down
19 changes: 17 additions & 2 deletions csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ private sealed unsafe class ImportedArrowArrayStream : IArrowArrayStream
private readonly Schema _schema;
private bool _disposed;

internal static string GetLastError(CArrowArrayStream* arrayStream, int errno)
{
byte* error = arrayStream->get_last_error(arrayStream);
if (error == null)
{
return $"Array stream operation failed with no message. Error code: {errno}";
}
return StringUtil.PtrToStringUtf8(error);
}

public ImportedArrowArrayStream(CArrowArrayStream* cArrayStream)
{
if (cArrayStream == null)
Expand All @@ -71,7 +81,7 @@ public ImportedArrowArrayStream(CArrowArrayStream* cArrayStream)
int errno = _cArrayStream->get_schema(_cArrayStream, cSchema);
if (errno != 0)
{
throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
throw new Exception(GetLastError(cArrayStream, errno));
}
_schema = CArrowSchemaImporter.ImportSchema(cSchema);
}
Expand All @@ -98,14 +108,19 @@ public ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancell
throw new ObjectDisposedException(typeof(ImportedArrowArrayStream).Name);
}

if (cancellationToken.IsCancellationRequested)
{
return new(Task.FromCanceled<RecordBatch>(cancellationToken));
}

RecordBatch result = null;
CArrowArray* cArray = CArrowArray.Create();
try
{
int errno = _cArrayStream->get_next(_cArrayStream, cArray);
if (errno != 0)
{
throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
return new(Task.FromException<RecordBatch>(new Exception(GetLastError(_cArrayStream, errno))));
}
if (cArray->release != null)
{
Expand Down
10 changes: 1 addition & 9 deletions csharp/src/Apache.Arrow/C/CArrowSchema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,7 @@ public unsafe struct CArrowSchema
{
var ptr = (CArrowSchema*)Marshal.AllocHGlobal(sizeof(CArrowSchema));

ptr->format = null;
ptr->name = null;
ptr->metadata = null;
ptr->flags = 0;
ptr->n_children = 0;
ptr->children = null;
ptr->dictionary = null;
ptr->release = null;
ptr->private_data = null;
*ptr = default;

return ptr;
}
Expand Down
11 changes: 10 additions & 1 deletion csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,21 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Apache.Arrow.Types;

namespace Apache.Arrow.C
{
public static class CArrowSchemaExporter
{
#if NET5_0_OR_GREATER
private static unsafe delegate* unmanaged[Stdcall]<CArrowSchema*, void> ReleaseSchemaPtr => &ReleaseCArrowSchema;
#else
private unsafe delegate void ReleaseArrowSchema(CArrowSchema* cArray);
private static unsafe readonly NativeDelegate<ReleaseArrowSchema> s_releaseSchema = new NativeDelegate<ReleaseArrowSchema>(ReleaseCArrowSchema);
private static unsafe delegate* unmanaged[Stdcall]<CArrowSchema*, void> ReleaseSchemaPtr => (delegate* unmanaged[Stdcall]<CArrowSchema*, void>)s_releaseSchema.Pointer;
#endif

/// <summary>
/// Export a type to a <see cref="CArrowSchema"/>.
Expand Down Expand Up @@ -65,7 +71,7 @@ public static unsafe void ExportType(IArrowType datatype, CArrowSchema* schema)

schema->dictionary = ConstructDictionary(datatype);

schema->release = (delegate* unmanaged[Stdcall]<CArrowSchema*, void>)s_releaseSchema.Pointer;
schema->release = ReleaseSchemaPtr;

schema->private_data = null;
}
Expand Down Expand Up @@ -243,6 +249,9 @@ private static long GetFlags(IArrowType datatype, bool nullable = true)
}
}

#if NET5_0_OR_GREATER
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })]
#endif
private static unsafe void ReleaseCArrowSchema(CArrowSchema* schema)
{
if (schema == null) return;
Expand Down
7 changes: 3 additions & 4 deletions csharp/src/Apache.Arrow/C/NativeDelegate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@

namespace Apache.Arrow.C
{
internal readonly struct NativeDelegate<T>
internal readonly struct NativeDelegate<T> where T : Delegate
{
private readonly T _managedDelegate; // For lifetime management
private readonly IntPtr _nativePointer;

public NativeDelegate(T managedDelegate)
{
_managedDelegate = managedDelegate;
_nativePointer = Marshal.GetFunctionPointerForDelegate<T>(managedDelegate);
Pointer = Marshal.GetFunctionPointerForDelegate(managedDelegate);
}

public IntPtr Pointer { get { return _nativePointer; } }
public IntPtr Pointer { get; }
}
}