Skip to content

Commit

Permalink
GH-37938: [Swift] Add initial C data interface implementation (#41342)
Browse files Browse the repository at this point in the history
Continuation for PR: #39091

This add an initial implementation of the C Data interface for swift. During development it was found that null count was not being properly maintained on the arrow buffers and this change is included as well. Also some minor refactoring was done to existing sources to enable this feature.

This has been tested from Swift calling into C to import data but not from Swift to C exporting data. Test is currently ongoing.
* GitHub Issue: #37938

Authored-by: Alva Bandy <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
abandy authored May 29, 2024
1 parent 13630c7 commit 4d524eb
Show file tree
Hide file tree
Showing 28 changed files with 1,231 additions and 55 deletions.
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,4 @@ r/tools/nixlibs-allowlist.txt
ruby/red-arrow/.yardopts
.github/pull_request_template.md
swift/data-generator/swift-datagen/go.sum
swift/CDataWGo/go.sum
4 changes: 4 additions & 0 deletions swift/.swiftlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
# under the License.

included:
- Arrow/Package.swift
- Arrow/Sources
- Arrow/Tests
- ArrowFlight/Package.swift
- ArrowFlight/Sources
- ArrowFlight/Tests
- CDataWGo/Package.swift
- CDataWGo/Sources/go-swift
excluded:
- Arrow/Sources/Arrow/File_generated.swift
- Arrow/Sources/Arrow/Message_generated.swift
Expand Down
22 changes: 14 additions & 8 deletions swift/Arrow/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,34 @@ let package = Package(
.macOS(.v10_14)
],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "Arrow",
targets: ["Arrow"]),
targets: ["Arrow"])
],
dependencies: [
// The latest version of flatbuffers v23.5.26 was built in May 26, 2023
// and therefore doesn't include the unaligned buffer swift changes.
// This can be changed back to using the tag once a new version of
// flatbuffers has been released.
.package(url: "https://github.com/google/flatbuffers.git", branch: "master")
.package(url: "https://github.com/google/flatbuffers.git", branch: "master"),
.package(
url: "https://github.com/apple/swift-atomics.git",
.upToNextMajor(from: "1.2.0") // or `.upToNextMinor
)
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ArrowC",
path: "Sources/ArrowC"
),
.target(
name: "Arrow",
dependencies: [
.product(name: "FlatBuffers", package: "flatbuffers")
dependencies: ["ArrowC",
.product(name: "FlatBuffers", package: "flatbuffers"),
.product(name: "Atomics", package: "swift-atomics")
]),
.testTarget(
name: "ArrowTests",
dependencies: ["Arrow"]),
dependencies: ["Arrow", "ArrowC"])
]
)
39 changes: 25 additions & 14 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,29 @@

import Foundation

public class ArrowArrayHolder {
public protocol ArrowArrayHolder {
var type: ArrowType {get}
var length: UInt {get}
var nullCount: UInt {get}
var array: Any {get}
var data: ArrowData {get}
var getBufferData: () -> [Data] {get}
var getBufferDataSizes: () -> [Int] {get}
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get}
}

public class ArrowArrayHolderImpl: ArrowArrayHolder {
public let array: Any
public let data: ArrowData
public let type: ArrowType
public let length: UInt
public let nullCount: UInt
public let array: Any
public let getBufferData: () -> [Data]
public let getBufferDataSizes: () -> [Int]
private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public init<T>(_ arrowArray: ArrowArray<T>) {
self.array = arrowArray
self.data = arrowArray.arrowData
self.length = arrowArray.length
self.type = arrowArray.arrowData.type
self.nullCount = arrowArray.nullCount
Expand Down Expand Up @@ -60,19 +73,9 @@ public class ArrowArrayHolder {
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
}
}

public static func makeArrowColumn(_ field: ArrowField,
holders: [ArrowArrayHolder]
) -> Result<ArrowColumn, ArrowError> {
do {
return .success(try holders[0].getArrowColumn(field, holders))
} catch {
return .failure(.runtimeError("\(error)"))
}
}
}

public class ArrowArray<T>: AsString {
public class ArrowArray<T>: AsString, AnyArray {
public typealias ItemType = T
public let arrowData: ArrowData
public var nullCount: UInt {return self.arrowData.nullCount}
Expand Down Expand Up @@ -101,6 +104,14 @@ public class ArrowArray<T>: AsString {

return "\(self[index]!)"
}

public func asAny(_ index: UInt) -> Any? {
if self[index] == nil {
return nil
}

return self[index]!
}
}

public class FixedArray<T>: ArrowArray<T> {
Expand Down
17 changes: 14 additions & 3 deletions swift/Arrow/Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,34 @@ public class ArrowBuffer {
static let maxLength = UInt.max
fileprivate(set) var length: UInt
let capacity: UInt
let rawPointer: UnsafeMutableRawPointer
public let rawPointer: UnsafeMutableRawPointer
let isMemoryOwner: Bool

init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) {
self.length = length
self.capacity = capacity
self.rawPointer = rawPointer
self.isMemoryOwner = isMemoryOwner
}

deinit {
self.rawPointer.deallocate()
if isMemoryOwner {
self.rawPointer.deallocate()
}
}

func append(to data: inout Data) {
let ptr = UnsafePointer(rawPointer.assumingMemoryBound(to: UInt8.self))
data.append(ptr, count: Int(capacity))
}

static func createEmptyBuffer() -> ArrowBuffer {
return ArrowBuffer(
length: 0,
capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
}

static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
let byteCount = UInt(data.count)
let capacity = alignTo64(byteCount)
Expand Down
135 changes: 135 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCExporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC
import Atomics

// The memory used by UnsafeAtomic is not automatically
// reclaimed. Since this value is initialized once
// and used until the program/app is closed it's
// memory will be released on program/app exit
let exportDataCounter: UnsafeAtomic<Int> = .create(0)

public class ArrowCExporter {
private class ExportData {
let id: Int
init() {
id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed)
ArrowCExporter.exportedData[id] = self
}
}

private class ExportSchema: ExportData {
public let arrowTypeNameCstr: UnsafePointer<CChar>
public let nameCstr: UnsafePointer<CChar>
private let arrowType: ArrowType
private let name: String
private let arrowTypeName: String
init(_ arrowType: ArrowType, name: String = "") throws {
self.arrowType = arrowType
// keeping the name str to ensure the cstring buffer remains valid
self.name = name
self.arrowTypeName = try arrowType.cDataFormatId
self.nameCstr = (self.name as NSString).utf8String!
self.arrowTypeNameCstr = (self.arrowTypeName as NSString).utf8String!
super.init()
}
}

private class ExportArray: ExportData {
private let arrowData: ArrowData
private(set) var data = [UnsafeRawPointer?]()
private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
init(_ arrowData: ArrowData) {
// keep a reference to the ArrowData
// obj so the memory doesn't get
// deallocated
self.arrowData = arrowData
for arrowBuffer in arrowData.buffers {
data.append(arrowBuffer.rawPointer)
}

self.buffers = UnsafeMutablePointer(mutating: data)
super.init()
}
}

private static var exportedData = [Int: ExportData]()
public init() {}

public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") ->
Result<Bool, ArrowError> {
do {
let exportSchema = try ExportSchema(arrowType, name: name)
cSchema.format = exportSchema.arrowTypeNameCstr
cSchema.name = exportSchema.nameCstr
cSchema.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id))
cSchema.release = {(data: UnsafeMutablePointer<ArrowC.ArrowSchema>?) in
let arraySchema = data!.pointee
let exportId = Int(bitPattern: arraySchema.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export schema not found with id \(exportId)")
}

// the data associated with this exportSchema object
// which includes the C strings for the format and name
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseSchema(data)
}
} catch {
return .failure(.unknownError("\(error)"))
}
return .success(true)
}

public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) ->
Result<Bool, ArrowError> {
return exportType(&schema, arrowType: field.type, name: field.name)
}

public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) {
let exportArray = ExportArray(arrowData)
cArray.buffers = exportArray.buffers
cArray.length = Int64(arrowData.length)
cArray.null_count = Int64(arrowData.nullCount)
cArray.n_buffers = Int64(arrowData.buffers.count)
// Swift Arrow does not currently support children or dictionaries
// This will need to be updated once support has been added
cArray.n_children = 0
cArray.children = nil
cArray.dictionary = nil
cArray.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id))
cArray.release = {(data: UnsafeMutablePointer<ArrowC.ArrowArray>?) in
let arrayData = data!.pointee
let exportId = Int(bitPattern: arrayData.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export data not found with id \(exportId)")
}

// the data associated with this exportArray object
// which includes the entire arrowData object
// and the buffers UnsafeMutablePointer[] will
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseArray(data)
}
}
}
Loading

0 comments on commit 4d524eb

Please sign in to comment.