Skip to content

Commit

Permalink
GH-43168: [Swift] Add buffer and array builders for Struct type (#43171)
Browse files Browse the repository at this point in the history
### Rationale for this change
The struct type requires additions/modifications to builders for buffers and arrays.   This is needed in order match functionality with the other supported types.

### What changes are included in this PR?
Updates to buffer and array builders for Struct type.

### Are these changes tested?
Yes, Array test has been added for the Struct type using the Struct builders

**This PR includes breaking changes to public APIs.**
The init method for ArrowArray currently does not throw.  This PR will change this behavior and allow the init method to throw.  External code that calls init will need to be updated to catch any errors. 

* GitHub Issue: #43168

Authored-by: Alva Bandy <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
abandy authored Jul 18, 2024
1 parent b382156 commit 0bae073
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 44 deletions.
46 changes: 21 additions & 25 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -78,41 +78,41 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
_ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder {
switch arrowType.id {
case .int8:
return ArrowArrayHolderImpl(FixedArray<Int8>(with))
return try ArrowArrayHolderImpl(FixedArray<Int8>(with))
case .int16:
return ArrowArrayHolderImpl(FixedArray<Int16>(with))
return try ArrowArrayHolderImpl(FixedArray<Int16>(with))
case .int32:
return ArrowArrayHolderImpl(FixedArray<Int32>(with))
return try ArrowArrayHolderImpl(FixedArray<Int32>(with))
case .int64:
return ArrowArrayHolderImpl(FixedArray<Int64>(with))
return try ArrowArrayHolderImpl(FixedArray<Int64>(with))
case .uint8:
return ArrowArrayHolderImpl(FixedArray<UInt8>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt8>(with))
case .uint16:
return ArrowArrayHolderImpl(FixedArray<UInt16>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt16>(with))
case .uint32:
return ArrowArrayHolderImpl(FixedArray<UInt32>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt32>(with))
case .uint64:
return ArrowArrayHolderImpl(FixedArray<UInt64>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt64>(with))
case .double:
return ArrowArrayHolderImpl(FixedArray<Double>(with))
return try ArrowArrayHolderImpl(FixedArray<Double>(with))
case .float:
return ArrowArrayHolderImpl(FixedArray<Float>(with))
return try ArrowArrayHolderImpl(FixedArray<Float>(with))
case .date32:
return ArrowArrayHolderImpl(Date32Array(with))
return try ArrowArrayHolderImpl(Date32Array(with))
case .date64:
return ArrowArrayHolderImpl(Date64Array(with))
return try ArrowArrayHolderImpl(Date64Array(with))
case .time32:
return ArrowArrayHolderImpl(Time32Array(with))
return try ArrowArrayHolderImpl(Time32Array(with))
case .time64:
return ArrowArrayHolderImpl(Time64Array(with))
return try ArrowArrayHolderImpl(Time64Array(with))
case .string:
return ArrowArrayHolderImpl(StringArray(with))
return try ArrowArrayHolderImpl(StringArray(with))
case .boolean:
return ArrowArrayHolderImpl(BoolArray(with))
return try ArrowArrayHolderImpl(BoolArray(with))
case .binary:
return ArrowArrayHolderImpl(BinaryArray(with))
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
return ArrowArrayHolderImpl(StructArray(with))
return try ArrowArrayHolderImpl(StructArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
Expand All @@ -125,7 +125,7 @@ public class ArrowArray<T>: AsString, AnyArray {
public var nullCount: UInt {return self.arrowData.nullCount}
public var length: UInt {return self.arrowData.length}

public required init(_ arrowData: ArrowData) {
public required init(_ arrowData: ArrowData) throws {
self.arrowData = arrowData
}

Expand Down Expand Up @@ -277,18 +277,14 @@ public class BinaryArray: ArrowArray<Data> {

public class StructArray: ArrowArray<[Any?]> {
public private(set) var arrowFields: [ArrowArrayHolder]?
public required init(_ arrowData: ArrowData) {
super.init(arrowData)
}

public func initialize() throws -> StructArray {
public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
return self
}

public override subscript(_ index: UInt) -> [Any?]? {
Expand Down
118 changes: 117 additions & 1 deletion swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>
public func finish() throws -> ArrowArray<T.ItemType> {
let buffers = self.bufferBuilder.finish()
let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
return U(arrowData)
let array = try U(arrowData)
return array
}

public func getStride() -> Int {
Expand Down Expand Up @@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time64>, T
}
}

public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}

public init(_ fields: [ArrowField]) throws {
self.fields = fields
var builders = [any ArrowArrayHolderBuilder]()
for field in fields {
builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type))
}

self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
}

public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let anyValues = values {
for index in 0..<builders.count {
self.builders[index].appendAny(anyValues[index])
}
} else {
for index in 0..<builders.count {
self.builders[index].appendAny(nil)
}
}
}

public override func finish() throws -> StructArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
childData.append(try builder.toHolder().array.arrowData)
}

let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
let structArray = try StructArray(arrowData)
return structArray
}
}

public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
Expand Down Expand Up @@ -168,6 +218,72 @@ public class ArrowArrayBuilders {
type == Float.self || type == Date.self
}

public static func loadStructArrayBuilderForType<T>(_ obj: T) throws -> StructArrayBuilder {
let mirror = Mirror(reflecting: obj)
var builders = [ArrowArrayHolderBuilder]()
var fields = [ArrowField]()
for (property, value) in mirror.children {
guard let propertyName = property else {
continue
}

let builderType = type(of: value)
let arrowType = ArrowType(ArrowType.infoForType(builderType))
fields.append(ArrowField(propertyName, type: arrowType, isNullable: true))
builders.append(try loadBuilder(arrowType: arrowType))
}

return try StructArrayBuilder(fields, builders: builders)
}

public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
arrowType: ArrowType) throws -> ArrowArrayHolderBuilder {
switch arrowType.id {
case .uint8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt8>
case .uint16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt16>
case .uint32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt32>
case .uint64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt64>
case .int8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int8>
case .int16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int16>
case .int32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int32>
case .int64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int64>
case .double:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Double>
case .float:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Float>
case .string:
return try StringArrayBuilder()
case .boolean:
return try BoolArrayBuilder()
case .binary:
return try BinaryArrayBuilder()
case .date32:
return try Date32ArrayBuilder()
case .date64:
return try Date64ArrayBuilder()
case .time32:
guard let timeType = arrowType as? ArrowTypeTime32 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time32ArrayBuilder(timeType.unit)
case .time64:
guard let timeType = arrowType as? ArrowTypeTime64 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time64ArrayBuilder(timeType.unit)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
}

public static func loadNumberArrayBuilder<T>() throws -> NumberArrayBuilder<T> {
let type = T.self
if type == Int8.self {
Expand Down
72 changes: 62 additions & 10 deletions swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder {
func finish() -> [ArrowBuffer]
}

public class BaseBufferBuilder<T> {
var values: ArrowBuffer
public class BaseBufferBuilder {
var nulls: ArrowBuffer
var stride: Int
public var offset: UInt = 0
public var capacity: UInt {return self.values.capacity}
public var capacity: UInt {return self.nulls.capacity}
public var length: UInt = 0
public var nullCount: UInt = 0

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
init(_ nulls: ArrowBuffer) {
self.nulls = nulls
}

Expand All @@ -61,7 +57,19 @@ public class BaseBufferBuilder<T> {
}
}

public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class ValuesBufferBuilder<T>: BaseBufferBuilder {
var values: ArrowBuffer
var stride: Int
public override var capacity: UInt {return self.values.capacity}

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
super.init(nulls)
}
}

public class FixedBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
private let defaultVal: ItemType
public required init() throws {
Expand Down Expand Up @@ -138,7 +146,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
}
}

public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
public class BoolBufferBuilder: ValuesBufferBuilder<Bool>, ArrowBufferBuilder {
public typealias ItemType = Bool
public required init() throws {
let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
Expand Down Expand Up @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
}
}

public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class VariableBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
var offsets: ArrowBuffer
let binaryStride = MemoryLayout<UInt8>.stride
Expand Down Expand Up @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {
}
}
}

public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var info: ArrowNestedType?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}

public func initializeTypeInfo(_ fields: [ArrowField]) {
info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
if length > self.nulls.length {
self.resize(length)
}

if newValue != nil {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
}
}

public func resize(_ length: UInt) {
if length > self.nulls.length {
let resizeLength = resizeLength(self.nulls)
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
self.nulls = nulls
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
return [nulls]
}
}
Loading

0 comments on commit 0bae073

Please sign in to comment.