-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-35809: [C#] Improvements to the C Data Interface #35810
Changes from 11 commits
71bb92a
5a7c883
f879fa8
ed68f43
b0bbeaa
1b47731
a595a9a
f9f310a
540c190
473a9be
154b86c
e9365fc
e10cad8
d7d830f
09e6a24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -15,16 +15,21 @@ | |||||
|
||||||
|
||||||
using System; | ||||||
using System.Runtime.CompilerServices; | ||||||
using System.Runtime.InteropServices; | ||||||
using Apache.Arrow.Memory; | ||||||
|
||||||
namespace Apache.Arrow.C | ||||||
{ | ||||||
public static class CArrowArrayExporter | ||||||
{ | ||||||
#if NET5_0_OR_GREATER | ||||||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArray*, void> ReleaseArrayPtr => &ReleaseArray; | ||||||
#else | ||||||
private unsafe delegate void ReleaseArrowArray(CArrowArray* cArray); | ||||||
private static unsafe readonly NativeDelegate<ReleaseArrowArray> s_releaseArray = new NativeDelegate<ReleaseArrowArray>(ReleaseArray); | ||||||
|
||||||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArray*, void> ReleaseArrayPtr => (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer; | ||||||
#endif | ||||||
/// <summary> | ||||||
/// Export an <see cref="IArrowArray"/> to a <see cref="CArrowArray"/>. Whether or not the | ||||||
/// export succeeds, the original array becomes invalid. Clone an array to continue using it | ||||||
|
@@ -58,7 +63,7 @@ public static unsafe void ExportArray(IArrowArray array, CArrowArray* cArray) | |||||
try | ||||||
{ | ||||||
ConvertArray(allocationOwner, array.Data, cArray); | ||||||
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)(IntPtr)s_releaseArray.Pointer; | ||||||
cArray->release = ReleaseArrayPtr; | ||||||
cArray->private_data = FromDisposable(allocationOwner); | ||||||
allocationOwner = null; | ||||||
} | ||||||
|
@@ -101,7 +106,7 @@ public static unsafe void ExportRecordBatch(RecordBatch batch, CArrowArray* cArr | |||||
try | ||||||
{ | ||||||
ConvertRecordBatch(allocationOwner, batch, cArray); | ||||||
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer; | ||||||
cArray->release = ReleaseArrayPtr; | ||||||
cArray->private_data = FromDisposable(allocationOwner); | ||||||
allocationOwner = null; | ||||||
} | ||||||
|
@@ -116,7 +121,7 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr | |||||
cArray->length = array.Length; | ||||||
cArray->offset = array.Offset; | ||||||
cArray->null_count = array.NullCount; | ||||||
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer; | ||||||
cArray->release = ReleaseArrayPtr; | ||||||
cArray->private_data = null; | ||||||
|
||||||
cArray->n_buffers = array.Buffers?.Length ?? 0; | ||||||
|
@@ -161,7 +166,7 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne | |||||
cArray->length = batch.Length; | ||||||
cArray->offset = 0; | ||||||
cArray->null_count = 0; | ||||||
cArray->release = (delegate* unmanaged[Stdcall]<CArrowArray*, void>)s_releaseArray.Pointer; | ||||||
cArray->release = ReleaseArrayPtr; | ||||||
cArray->private_data = null; | ||||||
|
||||||
cArray->n_buffers = 1; | ||||||
|
@@ -184,13 +189,12 @@ private unsafe static void ConvertRecordBatch(ExportedAllocationOwner sharedOwne | |||||
cArray->dictionary = null; | ||||||
} | ||||||
|
||||||
#if NET5_0_OR_GREATER | ||||||
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't the default calling convention be used instead? I'm not sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #34133 (comment). Ideally we would have used the default calling convention, but that would not be suppported on anything earlier than .NET 5. And in 64-bit platforms the calling convention doesn't matter either way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, the entire point (mostly) of implementing the C Data Interface is to be compatible with non-.Net producers/consumers. Those are extremely likely to use the platform default. So we should get it right at least when possible, i.e. on .Net >= 5.0. As for https://stackoverflow.com/questions/34832679/is-the-callingconvention-ignored-in-64-bit-net-applications , does it apply here? It's talking about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In any case, keeping the default convention seems more theoretically sound (and forward-looking, perhaps). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @westonpace @lidavidm for opinions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, are you saying to change this:
into this? #if NET5_0_OR_GREATER
public delegate* unmanaged<CArrowArray*, void> release;
#else
public delegate* unmanaged[Stdcall]<CArrowArray*, void> release;
#endif That would cause an incompatible API surface between the assembly compiled for .NET 6 and that compiled for the earlier frameworks. We have two options:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, why is this Arrow C# API users should only see the high-level import and export methods such as (an important thing to understand is that the C Data Interface is a binary interface, not an API) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you take a look for example at the Arrow C++ implementation, its release functions are entirely private. For example arrow/cpp/src/arrow/c/bridge.cc Line 107 in e628ca5
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I should make the members of these structs private? That also works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there were some reason for the fields to stay public, this one could also probably be defined as a union between a public IntPtr and a private delegate. (I don't know why the fields would need to be public; I'm pretty sure I just followed the pattern that was present for schemas.) |
||||||
#endif | ||||||
private unsafe static void ReleaseArray(CArrowArray* cArray) | ||||||
{ | ||||||
if (cArray->private_data != null) | ||||||
{ | ||||||
Dispose(&cArray->private_data); | ||||||
} | ||||||
cArray->private_data = null; | ||||||
Dispose(&cArray->private_data); | ||||||
cArray->release = null; | ||||||
} | ||||||
|
||||||
|
@@ -203,6 +207,10 @@ private unsafe static void ReleaseArray(CArrowArray* cArray) | |||||
private unsafe static void Dispose(void** ptr) | ||||||
{ | ||||||
GCHandle gch = GCHandle.FromIntPtr((IntPtr)(*ptr)); | ||||||
if (!gch.IsAllocated) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is effectively a noop. If the pointer was null, the previous line will throw InvalidOperationException and if it wasn't null then IsAllocated will return true. The overall change here also means that calling ReleaseArray twice will now throw an exception instead of the second call being a no-op. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a comment explaining when this might occur? (perhaps if an exception occurs while exporting the array?) |
||||||
{ | ||||||
return; | ||||||
} | ||||||
((IDisposable)gch.Target).Dispose(); | ||||||
gch.Free(); | ||||||
*ptr = null; | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,21 +15,37 @@ | |
|
||
|
||
using System; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
using Apache.Arrow.Ipc; | ||
|
||
namespace Apache.Arrow.C | ||
{ | ||
public static class CArrowArrayStreamExporter | ||
{ | ||
#if NET5_0_OR_GREATER | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int> GetSchemaPtr => &GetSchema; | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int> GetNextPtr => &GetNext; | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*> GetLastErrorPtr => &GetLastError; | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, void> ReleasePtr => &Release; | ||
#else | ||
private unsafe delegate int GetSchemaArrayStream(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema); | ||
private static unsafe NativeDelegate<GetSchemaArrayStream> s_getSchemaArrayStream = new NativeDelegate<GetSchemaArrayStream>(GetSchema); | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int> GetSchemaPtr => | ||
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer; | ||
private unsafe delegate int GetNextArrayStream(CArrowArrayStream* cArrayStream, CArrowArray* cArray); | ||
private static unsafe NativeDelegate<GetNextArrayStream> s_getNextArrayStream = new NativeDelegate<GetNextArrayStream>(GetNext); | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int> GetNextPtr => | ||
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int>)s_getNextArrayStream.Pointer; | ||
private unsafe delegate byte* GetLastErrorArrayStream(CArrowArrayStream* cArrayStream); | ||
private static unsafe NativeDelegate<GetLastErrorArrayStream> s_getLastErrorArrayStream = new NativeDelegate<GetLastErrorArrayStream>(GetLastError); | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*> GetLastErrorPtr => | ||
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*>)s_getLastErrorArrayStream.Pointer; | ||
private unsafe delegate void ReleaseArrayStream(CArrowArrayStream* cArrayStream); | ||
private static unsafe NativeDelegate<ReleaseArrayStream> s_releaseArrayStream = new NativeDelegate<ReleaseArrayStream>(Release); | ||
private static unsafe delegate* unmanaged[Stdcall]<CArrowArrayStream*, void> ReleasePtr => | ||
(delegate* unmanaged[Stdcall]<CArrowArrayStream*, void>)s_releaseArrayStream.Pointer; | ||
#endif | ||
|
||
/// <summary> | ||
/// Export an <see cref="IArrowArrayStream"/> to a <see cref="CArrowArrayStream"/>. | ||
|
@@ -59,12 +75,15 @@ public static unsafe void ExportArrayStream(IArrowArrayStream arrayStream, CArro | |
} | ||
|
||
cArrayStream->private_data = ExportedArrayStream.Export(arrayStream); | ||
cArrayStream->get_schema = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer; | ||
cArrayStream->get_next = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowArray*, int>)s_getNextArrayStream.Pointer; | ||
cArrayStream->get_last_error = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, byte*>)s_getLastErrorArrayStream.Pointer; | ||
cArrayStream->release = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, void>)s_releaseArrayStream.Pointer; | ||
cArrayStream->get_schema = GetSchemaPtr; | ||
cArrayStream->get_next = GetNextPtr; | ||
cArrayStream->get_last_error = GetLastErrorPtr; | ||
cArrayStream->release = ReleasePtr; | ||
} | ||
|
||
#if NET5_0_OR_GREATER | ||
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })] | ||
#endif | ||
private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchema* cSchema) | ||
{ | ||
ExportedArrayStream arrayStream = null; | ||
|
@@ -80,6 +99,9 @@ private unsafe static int GetSchema(CArrowArrayStream* cArrayStream, CArrowSchem | |
} | ||
} | ||
|
||
#if NET5_0_OR_GREATER | ||
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })] | ||
#endif | ||
private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* cArray) | ||
{ | ||
ExportedArrayStream arrayStream = null; | ||
|
@@ -100,6 +122,9 @@ private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* | |
} | ||
} | ||
|
||
#if NET5_0_OR_GREATER | ||
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })] | ||
#endif | ||
private unsafe static byte* GetLastError(CArrowArrayStream* cArrayStream) | ||
{ | ||
try | ||
|
@@ -113,10 +138,12 @@ private unsafe static int GetNext(CArrowArrayStream* cArrayStream, CArrowArray* | |
} | ||
} | ||
|
||
#if NET5_0_OR_GREATER | ||
[UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvStdcall) })] | ||
#endif | ||
private unsafe static void Release(CArrowArrayStream* cArrayStream) | ||
{ | ||
ExportedArrayStream arrayStream = ExportedArrayStream.FromPointer(cArrayStream->private_data); | ||
arrayStream.Dispose(); | ||
ExportedArrayStream.Free(&cArrayStream->private_data); | ||
cArrayStream->release = null; | ||
} | ||
|
||
|
@@ -140,6 +167,18 @@ sealed unsafe class ExportedArrayStream : IDisposable | |
return (void*)GCHandle.ToIntPtr(gch); | ||
} | ||
|
||
public static void Free(void** ptr) | ||
{ | ||
GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); | ||
if (!gch.IsAllocated) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Same comment about IsAllocated.) |
||
{ | ||
return; | ||
} | ||
((ExportedArrayStream)gch.Target).Dispose(); | ||
gch.Free(); | ||
*ptr = null; | ||
} | ||
|
||
public static ExportedArrayStream FromPointer(void* ptr) | ||
{ | ||
GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); | ||
|
@@ -170,7 +209,7 @@ void ReleaseLastError() | |
{ | ||
if (LastError != null) | ||
{ | ||
Marshal.FreeCoTaskMem((IntPtr)LastError); | ||
Marshal.FreeHGlobal((IntPtr)LastError); | ||
LastError = null; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps add a comment explaining why this needs .Net 5.0+? Or will it be obvious to a C# developer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
UnmanagedCallersOnlyAttribute
was introduced in .NET 5.