Skip to content

Commit

Permalink
GH-39720: [Swift] Switch reader to use arrow field instead of proto f…
Browse files Browse the repository at this point in the history
…or building arrays (#39721)

This PR updates the ArrowReaderHelper to use an ArrowField object for building an Array instead of a protobuf field obj.  This removes leveraging protobuf from building out the Arrays and makes the code easier to reuse (like for the C Data Interface)
* Closes: #39720

Authored-by: Alva Bandy <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
abandy authored Jan 27, 2024
1 parent 5d7f661 commit 21ffd82
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 104 deletions.
12 changes: 2 additions & 10 deletions swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>

public func finish() throws -> ArrowArray<T.ItemType> {
let buffers = self.bufferBuilder.finish()
let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount, stride: self.getStride())
let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
return U(arrowData)
}

public func getStride() -> Int {
MemoryLayout<T.ItemType>.stride
return self.type.getStride()
}
}

Expand Down Expand Up @@ -73,20 +73,12 @@ public class Date32ArrayBuilder: ArrowArrayBuilder<Date32BufferBuilder, Date32Ar
fileprivate convenience init() throws {
try self.init(ArrowType(ArrowType.ArrowDate32))
}

public override func getStride() -> Int {
MemoryLayout<Int32>.stride
}
}

public class Date64ArrayBuilder: ArrowArrayBuilder<Date64BufferBuilder, Date64Array> {
fileprivate convenience init() throws {
try self.init(ArrowType(ArrowType.ArrowDate64))
}

public override func getStride() -> Int {
MemoryLayout<Int64>.stride
}
}

public class Time32ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time32>, Time32Array> {
Expand Down
4 changes: 2 additions & 2 deletions swift/Arrow/Sources/Arrow/ArrowData.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class ArrowData {
public let length: UInt
public let stride: Int

init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt, stride: Int) throws {
init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws {
let infoType = arrowType.info
switch infoType {
case let .primitiveInfo(typeId):
Expand All @@ -45,7 +45,7 @@ public class ArrowData {
self.buffers = buffers
self.nullCount = nullCount
self.length = buffers[1].length
self.stride = stride
self.stride = arrowType.getStride()
}

public func isNull(_ at: UInt) -> Bool {
Expand Down
159 changes: 67 additions & 92 deletions swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import Foundation
private func makeBinaryHolder(_ buffers: [ArrowBuffer],
nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
do {
let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBinary), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
let arrowType = ArrowType(ArrowType.ArrowBinary)
let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(BinaryArray(arrowData)))
} catch let error as ArrowError {
return .failure(error)
Expand All @@ -34,8 +34,8 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer],
private func makeStringHolder(_ buffers: [ArrowBuffer],
nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
do {
let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
let arrowType = ArrowType(ArrowType.ArrowString)
let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(StringArray(arrowData)))
} catch let error as ArrowError {
return .failure(error)
Expand All @@ -44,33 +44,17 @@ private func makeStringHolder(_ buffers: [ArrowBuffer],
}
}

private func makeFloatHolder(_ floatType: org_apache_arrow_flatbuf_FloatingPoint,
buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
switch floatType.precision {
case .single:
return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat, nullCount: nullCount)
case .double:
return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble, nullCount: nullCount)
default:
return .failure(.unknownType("Float precision \(floatType.precision) currently not supported"))
}
}

private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
private func makeDateHolder(_ field: ArrowField,
buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
do {
if dateType.unit == .day {
let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Date>.stride)
if field.type.id == .date32 {
let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(Date32Array(arrowData)))
}

let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Date>.stride)
let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(Date64Array(arrowData)))
} catch let error as ArrowError {
return .failure(error)
Expand All @@ -79,22 +63,26 @@ private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
}
}

private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
private func makeTimeHolder(_ field: ArrowField,
buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
do {
if timeType.unit == .second || timeType.unit == .millisecond {
let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds
let arrowData = try ArrowData(ArrowTypeTime32(arrowUnit), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Time32>.stride)
return .success(ArrowArrayHolder(FixedArray<Time32>(arrowData)))
if field.type.id == .time32 {
if let arrowType = field.type as? ArrowTypeTime32 {
let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(FixedArray<Time32>(arrowData)))
} else {
return .failure(.invalid("Incorrect field type for time: \(field.type)"))
}
}

let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds
let arrowData = try ArrowData(ArrowTypeTime64(arrowUnit), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<Time64>.stride)
return .success(ArrowArrayHolder(FixedArray<Time64>(arrowData)))
if let arrowType = field.type as? ArrowTypeTime64 {
let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(FixedArray<Time64>(arrowData)))
} else {
return .failure(.invalid("Incorrect field type for time: \(field.type)"))
}
} catch let error as ArrowError {
return .failure(error)
} catch {
Expand All @@ -105,8 +93,8 @@ private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
private func makeBoolHolder(_ buffers: [ArrowBuffer],
nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
do {
let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBool), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<UInt8>.stride)
let arrowType = ArrowType(ArrowType.ArrowBool)
let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(BoolArray(arrowData)))
} catch let error as ArrowError {
return .failure(error)
Expand All @@ -116,13 +104,11 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer],
}

private func makeFixedHolder<T>(
_: T.Type, buffers: [ArrowBuffer],
arrowType: ArrowType.Info,
_: T.Type, field: ArrowField, buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
do {
let arrowData = try ArrowData(ArrowType(arrowType), buffers: buffers,
nullCount: nullCount, stride: MemoryLayout<T>.stride)
let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
return .success(ArrowArrayHolder(FixedArray<T>(arrowData)))
} catch let error as ArrowError {
return .failure(error)
Expand All @@ -131,67 +117,56 @@ private func makeFixedHolder<T>(
}
}

func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity function_body_length
func makeArrayHolder(
_ field: org_apache_arrow_flatbuf_Field,
buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
let type = field.typeType
switch type {
case .int:
let intType = field.type(type: org_apache_arrow_flatbuf_Int.self)!
let bitWidth = intType.bitWidth
if bitWidth == 8 {
if intType.isSigned {
return makeFixedHolder(Int8.self, buffers: buffers,
arrowType: ArrowType.ArrowInt8, nullCount: nullCount)
} else {
return makeFixedHolder(UInt8.self, buffers: buffers,
arrowType: ArrowType.ArrowUInt8, nullCount: nullCount)
}
} else if bitWidth == 16 {
if intType.isSigned {
return makeFixedHolder(Int16.self, buffers: buffers,
arrowType: ArrowType.ArrowInt16, nullCount: nullCount)
} else {
return makeFixedHolder(UInt16.self, buffers: buffers,
arrowType: ArrowType.ArrowUInt16, nullCount: nullCount)
}
} else if bitWidth == 32 {
if intType.isSigned {
return makeFixedHolder(Int32.self, buffers: buffers,
arrowType: ArrowType.ArrowInt32, nullCount: nullCount)
} else {
return makeFixedHolder(UInt32.self, buffers: buffers,
arrowType: ArrowType.ArrowUInt32, nullCount: nullCount)
}
} else if bitWidth == 64 {
if intType.isSigned {
return makeFixedHolder(Int64.self, buffers: buffers,
arrowType: ArrowType.ArrowInt64, nullCount: nullCount)
} else {
return makeFixedHolder(UInt64.self, buffers: buffers,
arrowType: ArrowType.ArrowUInt64, nullCount: nullCount)
}
}
return .failure(.unknownType("Int width \(bitWidth) currently not supported"))
case .bool:
let arrowField = fromProto(field: field)
return makeArrayHolder(arrowField, buffers: buffers, nullCount: nullCount)
}

func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
_ field: ArrowField,
buffers: [ArrowBuffer],
nullCount: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
let typeId = field.type.id
switch typeId {
case .int8:
return makeFixedHolder(Int8.self, field: field, buffers: buffers, nullCount: nullCount)
case .uint8:
return makeFixedHolder(UInt8.self, field: field, buffers: buffers, nullCount: nullCount)
case .int16:
return makeFixedHolder(Int16.self, field: field, buffers: buffers, nullCount: nullCount)
case .uint16:
return makeFixedHolder(UInt16.self, field: field, buffers: buffers, nullCount: nullCount)
case .int32:
return makeFixedHolder(Int32.self, field: field, buffers: buffers, nullCount: nullCount)
case .uint32:
return makeFixedHolder(UInt32.self, field: field, buffers: buffers, nullCount: nullCount)
case .int64:
return makeFixedHolder(Int64.self, field: field, buffers: buffers, nullCount: nullCount)
case .uint64:
return makeFixedHolder(UInt64.self, field: field, buffers: buffers, nullCount: nullCount)
case .boolean:
return makeBoolHolder(buffers, nullCount: nullCount)
case .floatingpoint:
let floatType = field.type(type: org_apache_arrow_flatbuf_FloatingPoint.self)!
return makeFloatHolder(floatType, buffers: buffers, nullCount: nullCount)
case .utf8:
case .float:
return makeFixedHolder(Float.self, field: field, buffers: buffers, nullCount: nullCount)
case .double:
return makeFixedHolder(Double.self, field: field, buffers: buffers, nullCount: nullCount)
case .string:
return makeStringHolder(buffers, nullCount: nullCount)
case .binary:
return makeBinaryHolder(buffers, nullCount: nullCount)
case .date:
let dateType = field.type(type: org_apache_arrow_flatbuf_Date.self)!
return makeDateHolder(dateType, buffers: buffers, nullCount: nullCount)
case .time:
let timeType = field.type(type: org_apache_arrow_flatbuf_Time.self)!
return makeTimeHolder(timeType, buffers: buffers, nullCount: nullCount)
case .date32:
return makeDateHolder(field, buffers: buffers, nullCount: nullCount)
case .time32:
return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
case .time64:
return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
default:
return .failure(.unknownType("Type \(type) currently not supported"))
return .failure(.unknownType("Type \(typeId) currently not supported"))
}
}

Expand Down
44 changes: 44 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowType.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import Foundation

public typealias Time32 = Int32
public typealias Time64 = Int64
public typealias Date32 = Int32
public typealias Date64 = Int64

func FlatBuffersVersion_23_1_4() { // swiftlint:disable:this identifier_name
}
Expand Down Expand Up @@ -165,6 +167,48 @@ public class ArrowType {
return ArrowType.ArrowUnknown
}
}

public func getStride( // swiftlint:disable:this cyclomatic_complexity
) -> Int {
switch self.id {
case .int8:
return MemoryLayout<Int8>.stride
case .int16:
return MemoryLayout<Int16>.stride
case .int32:
return MemoryLayout<Int32>.stride
case .int64:
return MemoryLayout<Int64>.stride
case .uint8:
return MemoryLayout<UInt8>.stride
case .uint16:
return MemoryLayout<UInt16>.stride
case .uint32:
return MemoryLayout<UInt32>.stride
case .uint64:
return MemoryLayout<UInt64>.stride
case .float:
return MemoryLayout<Float>.stride
case .double:
return MemoryLayout<Double>.stride
case .boolean:
return MemoryLayout<Bool>.stride
case .date32:
return MemoryLayout<Date32>.stride
case .date64:
return MemoryLayout<Date64>.stride
case .time32:
return MemoryLayout<Time32>.stride
case .time64:
return MemoryLayout<Time64>.stride
case .binary:
return MemoryLayout<Int8>.stride
case .string:
return MemoryLayout<Int8>.stride
default:
fatalError("Stride requested for unknown type: \(self)")
}
}
}

extension ArrowType.Info: Equatable {
Expand Down
Loading

0 comments on commit 21ffd82

Please sign in to comment.