Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37938: [Swift] initial impl of C Data interface #39091

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,4 @@ r/tools/nixlibs-allowlist.txt
ruby/red-arrow/.yardopts
.github/pull_request_template.md
swift/data-generator/swift-datagen/go.sum
swift/CDataWGo/go.sum
1 change: 1 addition & 0 deletions swift/.swiftlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ included:
- Arrow/Tests
- ArrowFlight/Sources
- ArrowFlight/Tests
- CDataWGo/Sources/go-swift
excluded:
- Arrow/Sources/Arrow/File_generated.swift
- Arrow/Sources/Arrow/Message_generated.swift
Expand Down
17 changes: 13 additions & 4 deletions swift/Arrow/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,27 @@ let package = Package(
// and therefore doesn't include the unaligned buffer swift changes.
// This can be changed back to using the tag once a new version of
// flatbuffers has been released.
.package(url: "https://github.com/google/flatbuffers.git", branch: "master")
.package(url: "https://github.com/google/flatbuffers.git", branch: "master"),
.package(
url: "https://github.com/apple/swift-atomics.git",
.upToNextMajor(from: "1.2.0") // or `.upToNextMinor
)
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ArrowC", // your C/C++ library's name
path: "Sources/ArrowC" // your path to the C/C++ library
),
.target(
name: "Arrow",
dependencies: [
.product(name: "FlatBuffers", package: "flatbuffers")
dependencies: ["ArrowC",
.product(name: "FlatBuffers", package: "flatbuffers"),
.product(name: "Atomics", package: "swift-atomics")
]),
.testTarget(
name: "ArrowTests",
dependencies: ["Arrow"]),
dependencies: ["Arrow", "ArrowC"]),
]
)
24 changes: 12 additions & 12 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,24 @@

import Foundation

public class ArrowArrayHolder {
public protocol ArrowArrayHolder {
var type: ArrowType {get}
var length: UInt {get}
var nullCount: UInt {get}
var array: Any {get}
var getBufferData: () -> [Data] {get}
var getBufferDataSizes: () -> [Int] {get}
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get}
}

public class ArrowArrayHolderImpl: ArrowArrayHolder {
public let type: ArrowType
public let length: UInt
public let nullCount: UInt
public let array: Any
public let getBufferData: () -> [Data]
public let getBufferDataSizes: () -> [Int]
private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public init<T>(_ arrowArray: ArrowArray<T>) {
self.array = arrowArray
self.length = arrowArray.length
Expand Down Expand Up @@ -60,16 +70,6 @@ public class ArrowArrayHolder {
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
}
}

public static func makeArrowColumn(_ field: ArrowField,
holders: [ArrowArrayHolder]
) -> Result<ArrowColumn, ArrowError> {
do {
return .success(try holders[0].getArrowColumn(field, holders))
} catch {
return .failure(.runtimeError("\(error)"))
}
}
}

public class ArrowArray<T>: AsString {
Expand Down
15 changes: 13 additions & 2 deletions swift/Arrow/Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,33 @@ public class ArrowBuffer {
fileprivate(set) var length: UInt
let capacity: UInt
let rawPointer: UnsafeMutableRawPointer
let isMemoryOwner: Bool

init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) {
self.length = length
self.capacity = capacity
self.rawPointer = rawPointer
self.isMemoryOwner = isMemoryOwner
}

deinit {
self.rawPointer.deallocate()
if isMemoryOwner {
self.rawPointer.deallocate()
}
}

func append(to data: inout Data) {
let ptr = UnsafePointer(rawPointer.assumingMemoryBound(to: UInt8.self))
data.append(ptr, count: Int(capacity))
}

static func createEmptyBuffer() -> ArrowBuffer {
return ArrowBuffer(
length: 0,
capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
}

static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
let byteCount = UInt(data.count)
let capacity = alignTo64(byteCount)
Expand Down
134 changes: 134 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCExporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC
import Atomics

extension String {
var cstring: UnsafePointer<CChar> {
(self as NSString).cString(using: String.Encoding.utf8.rawValue)!
pitrou marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I was still a bit mystified by this, and it appears that using the cString method actually creates a memory leak, at least on non-Apple platforms:
swiftlang/swift-corelibs-foundation#3885

Instead, perhaps we should use NSString.maximumLengthOfBytes and NSString.getCString so as to control the lifetime of the CChar array.

}
}

// The memory used by UnsafeAtomic is not automatically
// reclaimed. Since this value is initialized once
// and used until the program/app is closed it's
// memory will be released on program/app exit
let exportDataCounter: UnsafeAtomic<Int> = .create(0)

public class ArrowCExporter {
private class ExportData {
let id: Int
init() {
id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed)
ArrowCExporter.exportedData[id] = self
}
}

private class ExportSchema: ExportData {
public let arrowTypeName: UnsafePointer<CChar>
public let name: UnsafePointer<CChar>
private let arrowType: ArrowType
init(_ arrowType: ArrowType, name: String = "") throws {
self.arrowType = arrowType
self.arrowTypeName = try arrowType.cDataFormatId.cstring
self.name = name.cstring
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure the name string survives long enough? I see it's not caught in this class' members.

super.init()
}
}

private class ExportArray: ExportData {
private let arrowData: ArrowData
private(set) var data = [UnsafeRawPointer?]()
private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
init(_ arrowData: ArrowData) {
// keep a reference to the ArrowData
// obj so the memory doesn't get
// deallocated
self.arrowData = arrowData
for arrowBuffer in arrowData.buffers {
data.append(arrowBuffer.rawPointer)
}

self.buffers = UnsafeMutablePointer(mutating: data)
super.init()
}
}

private static var exportedData = [Int: ExportData]()
public init() {}

public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") ->
Result<Bool, ArrowError> {
do {
let exportSchema = try ExportSchema(arrowType, name: name)
cSchema.format = exportSchema.arrowTypeName
cSchema.name = exportSchema.name
cSchema.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Newbie question, but according to the docs, it seems UnsafeMutableRawPointer(bitPattern: exportSchema.id) should work?

cSchema.release = {(data: UnsafeMutablePointer<ArrowC.ArrowSchema>?) in
let arraySchema = data!.pointee
let exportId = Int(bitPattern: arraySchema.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export schema not found with id \(exportId)")
}

// the data associated with this exportSchema object
// which includes the C strings for the format and name
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseSchema(data)
}
} catch {
return .failure(.unknownError("\(error)"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two questions:

  1. if we arrive here, then the exportSchema object is never removed from exportedData and therefore leaks until the end of the process, right?
  2. can we fail with a better error message?

}
return .success(true)
}

public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) ->
Result<Bool, ArrowError> {
return exportType(&schema, arrowType: field.type, name: field.name)
}

public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) {
let exportArray = ExportArray(arrowData)
cArray.buffers = exportArray.buffers
cArray.length = Int64(arrowData.length)
cArray.null_count = Int64(arrowData.nullCount)
cArray.n_buffers = Int64(arrowData.buffers.count)
cArray.n_children = 0
cArray.children = nil
cArray.dictionary = nil
cArray.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id))
cArray.release = {(data: UnsafeMutablePointer<ArrowC.ArrowArray>?) in
let arrayData = data!.pointee
let exportId = Int(bitPattern: arrayData.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export data not found with id \(exportId)")
}

// the data associated with this exportArray object
// which includes the entire arrowData object
// and the buffers UnsafeMutablePointer[] will
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseArray(data)
}
}
}
Loading
Loading