Skip to content

Commit

Permalink
GH-37938: [Swift] initial impl of C Data interface
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed Feb 16, 2024
1 parent a03d957 commit a8166fe
Show file tree
Hide file tree
Showing 25 changed files with 1,121 additions and 43 deletions.
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,4 @@ r/tools/nixlibs-allowlist.txt
ruby/red-arrow/.yardopts
.github/pull_request_template.md
swift/data-generator/swift-datagen/go.sum
swift/CDataWGo/go.sum
1 change: 1 addition & 0 deletions swift/.swiftlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ included:
- Arrow/Tests
- ArrowFlight/Sources
- ArrowFlight/Tests
- CDataWGo/Sources/go-swift
excluded:
- Arrow/Sources/Arrow/File_generated.swift
- Arrow/Sources/Arrow/Message_generated.swift
Expand Down
17 changes: 13 additions & 4 deletions swift/Arrow/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,27 @@ let package = Package(
// and therefore doesn't include the unaligned buffer swift changes.
// This can be changed back to using the tag once a new version of
// flatbuffers has been released.
.package(url: "https://github.com/google/flatbuffers.git", branch: "master")
.package(url: "https://github.com/google/flatbuffers.git", branch: "master"),
.package(
url: "https://github.com/apple/swift-atomics.git",
.upToNextMajor(from: "1.2.0") // or `.upToNextMinor
)
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ArrowC", //your C/C++ library's name
path: "Sources/ArrowC" //your path to the C/C++ library
),
.target(
name: "Arrow",
dependencies: [
.product(name: "FlatBuffers", package: "flatbuffers")
dependencies: ["ArrowC",
.product(name: "FlatBuffers", package: "flatbuffers"),
.product(name: "Atomics", package: "swift-atomics")
]),
.testTarget(
name: "ArrowTests",
dependencies: ["Arrow"]),
dependencies: ["Arrow", "ArrowC"]),
]
)
24 changes: 12 additions & 12 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,24 @@

import Foundation

public class ArrowArrayHolder {
public protocol ArrowArrayHolder {
var type: ArrowType {get}
var length: UInt {get}
var nullCount: UInt {get}
var array: Any {get}
var getBufferData: () -> [Data] {get}
var getBufferDataSizes: () -> [Int] {get}
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get}
}

public class ArrowArrayHolderImpl: ArrowArrayHolder {
public let type: ArrowType
public let length: UInt
public let nullCount: UInt
public let array: Any
public let getBufferData: () -> [Data]
public let getBufferDataSizes: () -> [Int]
private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public init<T>(_ arrowArray: ArrowArray<T>) {
self.array = arrowArray
self.length = arrowArray.length
Expand Down Expand Up @@ -60,16 +70,6 @@ public class ArrowArrayHolder {
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
}
}

public static func makeArrowColumn(_ field: ArrowField,
holders: [ArrowArrayHolder]
) -> Result<ArrowColumn, ArrowError> {
do {
return .success(try holders[0].getArrowColumn(field, holders))
} catch {
return .failure(.runtimeError("\(error)"))
}
}
}

public class ArrowArray<T>: AsString {
Expand Down
15 changes: 13 additions & 2 deletions swift/Arrow/Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,33 @@ public class ArrowBuffer {
fileprivate(set) var length: UInt
let capacity: UInt
let rawPointer: UnsafeMutableRawPointer
let isMemoryOwner: Bool

init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) {
self.length = length
self.capacity = capacity
self.rawPointer = rawPointer
self.isMemoryOwner = isMemoryOwner
}

deinit {
self.rawPointer.deallocate()
if isMemoryOwner {
self.rawPointer.deallocate()
}
}

func append(to data: inout Data) {
let ptr = UnsafePointer(rawPointer.assumingMemoryBound(to: UInt8.self))
data.append(ptr, count: Int(capacity))
}

static func createEmptyBuffer() -> ArrowBuffer {
return ArrowBuffer(
length: 0,
capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
}

static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
let byteCount = UInt(data.count)
let capacity = alignTo64(byteCount)
Expand Down
134 changes: 134 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCExporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC
import Atomics

extension String {
var cstring: UnsafePointer<CChar> {
(self as NSString).cString(using: String.Encoding.utf8.rawValue)!
}
}

// The memory used by UnsafeAtomic is not automatically
// reclaimed. Since this value is initialized once
// and used until the program/app is closed it's
// memory will be released on program/app exit
let exportDataCounter: UnsafeAtomic<Int> = .create(0)

public class ArrowCExporter {
private class ExportData {
let id: Int
init() {
id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed)
ArrowCExporter.exportedData[id] = self
}
}

private class ExportSchema: ExportData {
public let arrowTypeName: UnsafePointer<CChar>
public let name: UnsafePointer<CChar>
private let arrowType: ArrowType
init(_ arrowType: ArrowType, name: String = "") throws {
self.arrowType = arrowType
self.arrowTypeName = try arrowType.cDataFormatId.cstring
self.name = name.cstring
super.init()
}
}

private class ExportArray: ExportData {
private let arrowData: ArrowData
private(set) var data = [UnsafeRawPointer?]()
private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
init(_ arrowData: ArrowData) {
// keep a reference to the ArrowData
// obj so the memory doesn't get
// deallocated
self.arrowData = arrowData
for arrowBuffer in arrowData.buffers {
data.append(arrowBuffer.rawPointer)
}

self.buffers = UnsafeMutablePointer(mutating: data)
super.init()
}
}

private static var exportedData = [Int: ExportData]()
public init() {}

public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") ->
Result<Bool, ArrowError> {
do {
let exportSchema = try ExportSchema(arrowType, name: name)
cSchema.format = exportSchema.arrowTypeName
cSchema.name = exportSchema.name
cSchema.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id))
cSchema.release = {(data: UnsafeMutablePointer<ArrowC.ArrowSchema>?) in
let arraySchema = data!.pointee
let exportId = Int(bitPattern: arraySchema.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export schema not found with id \(exportId)")
}

// the data associated with this exportschema object
// which includes the c strings for the format and name
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ClearReleaseSchema(data)
}
} catch {
return .failure(.unknownError("\(error)"))
}
return .success(true)
}

public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) ->
Result<Bool, ArrowError> {
return exportType(&schema, arrowType: field.type, name: field.name)
}

public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) {
let exportArray = ExportArray(arrowData)
cArray.buffers = exportArray.buffers
cArray.length = Int64(arrowData.length)
cArray.null_count = Int64(arrowData.nullCount)
cArray.n_buffers = Int64(arrowData.buffers.count)
cArray.n_children = 0
cArray.children = nil
cArray.dictionary = nil
cArray.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id))
cArray.release = {(data: UnsafeMutablePointer<ArrowC.ArrowArray>?) in
let arrayData = data!.pointee
let exportId = Int(bitPattern: arrayData.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export data not found with id \(exportId)")
}

// the data associated with this exportdata object
// which includes the entire arrowdata object
// and the buffers UnsafeMutablePointer[] will
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ClearReleaseArray(data)
}
}
}
Loading

0 comments on commit a8166fe

Please sign in to comment.