diff --git a/csharp/src/Apache.Arrow/Arrays/Array.cs b/csharp/src/Apache.Arrow/Arrays/Array.cs
index 0838134b19c6d..4abe63e05ad83 100644
--- a/csharp/src/Apache.Arrow/Arrays/Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Array.cs
@@ -31,7 +31,7 @@ protected Array(ArrayData data)
public int Offset => Data.Offset;
- public int NullCount => Data.NullCount;
+ public int NullCount => Data.GetNullCount();
public ArrowBuffer NullBitmapBuffer => Data.Buffers[0];
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
index 55d77f598c4e4..cdb6ed6b39418 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
@@ -15,7 +15,6 @@
using Apache.Arrow.Memory;
using Apache.Arrow.Types;
-using Google.FlatBuffers;
using System;
using System.Collections.Generic;
using System.Linq;
@@ -28,12 +27,30 @@ public sealed class ArrayData : IDisposable
public readonly IArrowType DataType;
public readonly int Length;
- public readonly int NullCount;
+
+ ///
+ /// The number of null values in the Array. May be -1 if the null count has not been computed.
+ ///
+ public int NullCount;
+
public readonly int Offset;
public readonly ArrowBuffer[] Buffers;
public readonly ArrayData[] Children;
public readonly ArrayData Dictionary; // Only used for dictionary type
+ ///
+ /// Get the number of null values in the Array, computing the count if required.
+ ///
+ public int GetNullCount()
+ {
+ if (NullCount == RecalculateNullCount)
+ {
+ NullCount = ComputeNullCount();
+ }
+
+ return NullCount;
+ }
+
// This is left for compatibility with lower version binaries
// before the dictionary type was supported.
public ArrayData(
@@ -111,7 +128,25 @@ public ArrayData Slice(int offset, int length)
length = Math.Min(Length - offset, length);
offset += Offset;
- return new ArrayData(DataType, length, RecalculateNullCount, offset, Buffers, Children, Dictionary);
+ int nullCount;
+ if (NullCount == 0)
+ {
+ nullCount = 0;
+ }
+ else if (NullCount == Length)
+ {
+ nullCount = length;
+ }
+ else if (offset == Offset && length == Length)
+ {
+ nullCount = NullCount;
+ }
+ else
+ {
+ nullCount = RecalculateNullCount;
+ }
+
+ return new ArrayData(DataType, length, nullCount, offset, Buffers, Children, Dictionary);
}
public ArrayData Clone(MemoryAllocator allocator = default)
@@ -125,5 +160,24 @@ public ArrayData Clone(MemoryAllocator allocator = default)
Children?.Select(b => b.Clone(allocator))?.ToArray(),
Dictionary?.Clone(allocator));
}
+
+ private int ComputeNullCount()
+ {
+ if (DataType.TypeId == ArrowTypeId.Union)
+ {
+ return UnionArray.ComputeNullCount(this);
+ }
+
+ if (Buffers == null || Buffers.Length == 0 || Buffers[0].IsEmpty)
+ {
+ return 0;
+ }
+
+ // Note: Dictionary arrays may be logically null if there is a null in the dictionary values,
+ // but this isn't accounted for by the IArrowArray.IsNull implementation,
+ // so we maintain consistency with that behaviour here.
+
+ return Length - BitUtility.CountBits(Buffers[0].Span, Offset, Length);
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 698d74e4bac84..84658a5fab812 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -71,7 +71,7 @@ public ArrayDataConcatenationVisitor(IReadOnlyList arrayDataList, Mem
foreach (ArrayData arrayData in _arrayDataList)
{
_totalLength += arrayData.Length;
- _totalNullCount += arrayData.NullCount;
+ _totalNullCount += arrayData.GetNullCount();
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
index b6b61c560e482..459a30e22115f 100644
--- a/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
@@ -53,5 +53,28 @@ protected override bool FieldIsValid(IArrowArray fieldArray, int index)
{
return fieldArray.IsValid(ValueOffsets[index]);
}
+
+ internal new static int ComputeNullCount(ArrayData data)
+ {
+ var offset = data.Offset;
+ var length = data.Length;
+ var typeIds = data.Buffers[0].Span.Slice(offset, length);
+ var valueOffsets = data.Buffers[1].Span.CastTo().Slice(offset, length);
+ var childArrays = new IArrowArray[data.Children.Length];
+ for (var childIdx = 0; childIdx < data.Children.Length; ++childIdx)
+ {
+ childArrays[childIdx] = ArrowArrayFactory.BuildArray(data.Children[childIdx]);
+ }
+
+ var nullCount = 0;
+ for (var i = 0; i < length; ++i)
+ {
+ var typeId = typeIds[i];
+ var valueOffset = valueOffsets[i];
+ nullCount += childArrays[typeId].IsNull(valueOffset) ? 1 : 0;
+ }
+
+ return nullCount;
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/NullArray.cs b/csharp/src/Apache.Arrow/Arrays/NullArray.cs
index 762540065c929..7f3e183829243 100644
--- a/csharp/src/Apache.Arrow/Arrays/NullArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/NullArray.cs
@@ -95,7 +95,7 @@ public NullArray(int length)
public int Offset => Data.Offset;
- public int NullCount => Data.NullCount;
+ public int NullCount => Data.GetNullCount();
public void Dispose() { }
public bool IsNull(int index) => true;
diff --git a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
index 07d36e25cfc23..ef55786f01a4a 100644
--- a/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
@@ -47,5 +47,26 @@ protected override bool FieldIsValid(IArrowArray fieldArray, int index)
{
return fieldArray.IsValid(index);
}
+
+ internal new static int ComputeNullCount(ArrayData data)
+ {
+ var offset = data.Offset;
+ var length = data.Length;
+ var typeIds = data.Buffers[0].Span.Slice(offset, length);
+ var childArrays = new IArrowArray[data.Children.Length];
+ for (var childIdx = 0; childIdx < data.Children.Length; ++childIdx)
+ {
+ childArrays[childIdx] = ArrowArrayFactory.BuildArray(data.Children[childIdx]);
+ }
+
+ var nullCount = 0;
+ for (var i = 0; i < data.Length; ++i)
+ {
+ var typeId = typeIds[i];
+ nullCount += childArrays[typeId].IsNull(offset + i) ? 1 : 0;
+ }
+
+ return nullCount;
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
index 5fcb276655162..f96f527135351 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
@@ -41,7 +41,7 @@ public abstract class UnionArray : IArrowArray
public int Offset => Data.Offset;
- public int NullCount => Data.NullCount;
+ public int NullCount => Data.GetNullCount();
public bool IsValid(int index) => NullCount == 0 || FieldIsValid(Fields[TypeIds[index]], index);
@@ -91,6 +91,16 @@ protected static void ValidateMode(UnionMode expected, UnionMode actual)
}
}
+ internal static int ComputeNullCount(ArrayData data)
+ {
+ return ((UnionType)data.DataType).Mode switch
+ {
+ UnionMode.Sparse => SparseUnionArray.ComputeNullCount(data),
+ UnionMode.Dense => DenseUnionArray.ComputeNullCount(data),
+ _ => throw new InvalidOperationException("unknown union mode in null count computation")
+ };
+ }
+
private IReadOnlyList InitializeFields()
{
IArrowArray[] result = new IArrowArray[Data.Children.Length];
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 03059eaf5d4df..b241fdfea3bda 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -115,7 +115,7 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
{
cArray->length = array.Length;
cArray->offset = array.Offset;
- cArray->null_count = array.NullCount;
+ cArray->null_count = array.NullCount; // The C Data interface allows the null count to be -1
cArray->release = ReleaseArrayPtr;
cArray->private_data = MakePrivateData(sharedOwner);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index b002f8c8b1578..7b319b03d790c 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -376,7 +376,7 @@ private void CreateSelfAndChildrenFieldNodes(ArrayData data)
CreateSelfAndChildrenFieldNodes(data.Children[i]);
}
}
- Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.NullCount);
+ Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.GetNullCount());
}
private static int CountAllNodes(IReadOnlyList fields)
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
index a0e90cbbc7c61..682ebec323dc0 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
@@ -185,6 +185,7 @@ public void SlicePrimitiveArrayWithNulls()
TestSlice(x => x.Append(new DateTime(2019, 1, 1)).Append(new DateTime(2019, 1, 2)).AppendNull().Append(new DateTime(2019, 1, 3)));
TestSlice(x => x.Append(10).Append(20).AppendNull().Append(30));
TestSlice(x => x.Append(10).Append(20).AppendNull().Append(30));
+ TestSlice(x => x.AppendNull().AppendNull().AppendNull()); // All nulls
static void TestNumberSlice()
where T : struct, INumber
@@ -314,6 +315,8 @@ private void ValidateArrays(PrimitiveArray slicedArray)
.SequenceEqual(slicedArray.Values));
Assert.Equal(baseArray.GetValue(slicedArray.Offset), slicedArray.GetValue(0));
+
+ ValidateNullCount(slicedArray);
}
private void ValidateArrays(BooleanArray slicedArray)
@@ -333,6 +336,8 @@ private void ValidateArrays(BooleanArray slicedArray)
#pragma warning disable CS0618
Assert.Equal(baseArray.GetBoolean(slicedArray.Offset), slicedArray.GetBoolean(0));
#pragma warning restore CS0618
+
+ ValidateNullCount(slicedArray);
}
private void ValidateArrays(BinaryArray slicedArray)
@@ -347,6 +352,16 @@ private void ValidateArrays(BinaryArray slicedArray)
.SequenceEqual(slicedArray.ValueOffsets));
Assert.True(baseArray.GetBytes(slicedArray.Offset).SequenceEqual(slicedArray.GetBytes(0)));
+
+ ValidateNullCount(slicedArray);
+ }
+
+ private static void ValidateNullCount(IArrowArray slicedArray)
+ {
+ var expectedNullCount = Enumerable.Range(0, slicedArray.Length)
+ .Select(i => slicedArray.IsNull(i) ? 1 : 0)
+ .Sum();
+ Assert.Equal(expectedNullCount, slicedArray.NullCount);
}
}
}
diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
index 1fb5cf2415c68..45fed722a745c 100644
--- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs
@@ -25,6 +25,46 @@ public class UnionArrayTests
[InlineData(UnionMode.Sparse)]
[InlineData(UnionMode.Dense)]
public void UnionArray_IsNull(UnionMode mode)
+ {
+ var (array, expectedNull) = BuildUnionArray(mode, 100);
+
+ for (var i = 0; i < array.Length; ++i)
+ {
+ Assert.Equal(expectedNull[i], array.IsNull(i));
+ Assert.Equal(!expectedNull[i], array.IsValid(i));
+ }
+ }
+
+ [Theory]
+ [InlineData(UnionMode.Sparse)]
+ [InlineData(UnionMode.Dense)]
+ public void UnionArray_Slice(UnionMode mode)
+ {
+ var (array, expectedNull) = BuildUnionArray(mode, 10);
+
+ for (var offset = 0; offset < array.Length; ++offset)
+ {
+ for (var length = 0; length < array.Length - offset; ++length)
+ {
+ var slicedArray = ArrowArrayFactory.Slice(array, offset, length);
+
+ var nullCount = 0;
+ for (var i = 0; i < slicedArray.Length; ++i)
+ {
+ // TODO: Shouldn't need to add offset in IsNull/IsValid calls,
+ // see https://github.com/apache/arrow/issues/41140
+ Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(offset + i));
+ Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(offset + i));
+ nullCount += expectedNull[offset + i] ? 1 : 0;
+ }
+
+ Assert.True(nullCount == slicedArray.NullCount, $"offset = {offset}, length = {length}");
+ Assert.Equal(nullCount, slicedArray.NullCount);
+ }
+ }
+ }
+
+ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length)
{
var fields = new Field[]
{
@@ -34,7 +74,6 @@ public void UnionArray_IsNull(UnionMode mode)
var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray();
var type = new UnionType(fields, typeIds, mode);
- const int length = 100;
var nullCount = 0;
var field0Builder = new Int32Array.Builder();
var field1Builder = new FloatArray.Builder();
@@ -44,7 +83,7 @@ public void UnionArray_IsNull(UnionMode mode)
for (var i = 0; i < length; ++i)
{
- var isNull = i % 5 == 0;
+ var isNull = i % 3 == 0;
expectedNull[i] = isNull;
nullCount += isNull ? 1 : 0;
@@ -104,10 +143,6 @@ public void UnionArray_IsNull(UnionMode mode)
? new DenseUnionArray(type, length, children, typeIdsBuffer, valuesOffsetBuffer, nullCount)
: new SparseUnionArray(type, length, children, typeIdsBuffer, nullCount);
- for (var i = 0; i < length; ++i)
- {
- Assert.Equal(expectedNull[i], array.IsNull(i));
- Assert.Equal(!expectedNull[i], array.IsValid(i));
- }
+ return (array, expectedNull);
}
}