Skip to content

Commit

Permalink
Make FFI support optional, change APIs to be safe (#2302) (#2303)
Browse files Browse the repository at this point in the history
* Make FFI support optional (#2302)

* Fix integration test
  • Loading branch information
tustvold authored Aug 4, 2022
1 parent 4b15b7e commit d87f6a4
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 104 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/arrow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ jobs:
- name: Test
run: |
cargo test -p arrow
- name: Test --features=force_validate,prettyprint
- name: Test --features=force_validate,prettyprint,ffi
run: |
cargo test -p arrow --features=force_validate,prettyprint
cargo test -p arrow --features=force_validate,prettyprint,ffi
- name: Run examples
run: |
# Test arrow examples
Expand Down Expand Up @@ -153,8 +153,8 @@ jobs:
- name: Build
run: |
cd arrow
cargo build --no-default-features --features=csv,ipc,simd --target wasm32-unknown-unknown
cargo build --no-default-features --features=csv,ipc,simd --target wasm32-wasi
cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-unknown-unknown
cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-wasi
clippy:
name: Clippy
Expand All @@ -172,4 +172,4 @@ jobs:
rustup component add clippy
- name: Run clippy
run: |
cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils --all-targets -- -D warnings
cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi --all-targets -- -D warnings
4 changes: 3 additions & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,13 @@ prettyprint = ["comfy-table"]
# an optional dependency for supporting compile to wasm32-unknown-unknown
# target without assuming an environment containing JavaScript.
test_utils = ["rand"]
pyarrow = ["pyo3"]
pyarrow = ["pyo3", "ffi"]
# force_validate runs full data validation for all arrays that are created
# this is not enabled by default as it is too computationally expensive
# but is run as part of our CI checks
force_validate = []
# Enable ffi support
ffi = []

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
Expand Down
63 changes: 1 addition & 62 deletions arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@
// under the License.

use std::any::Any;
use std::convert::{From, TryFrom};
use std::convert::From;
use std::fmt;
use std::sync::Arc;

use super::*;
use crate::array::equal_json::JsonEqual;
use crate::buffer::{Buffer, MutableBuffer};
use crate::error::Result;
use crate::ffi;

/// Trait for dealing with different types of array at runtime when the type of the
/// array is not known in advance.
Expand Down Expand Up @@ -216,15 +214,6 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
self.data_ref().get_array_memory_size() + std::mem::size_of_val(self)
- std::mem::size_of::<ArrayData>()
}

/// returns two pointers that represent this array in the C Data Interface (FFI)
fn to_raw(
&self,
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
let data = self.data().clone();
let array = ffi::ArrowArray::try_from(data)?;
Ok(ffi::ArrowArray::into_raw(array))
}
}

/// A reference-counted reference to a generic `Array`.
Expand Down Expand Up @@ -287,14 +276,6 @@ impl Array for ArrayRef {
fn get_array_memory_size(&self) -> usize {
self.as_ref().get_array_memory_size()
}

fn to_raw(
&self,
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
let data = self.data().clone();
let array = ffi::ArrowArray::try_from(data)?;
Ok(ffi::ArrowArray::into_raw(array))
}
}

impl<'a, T: Array> Array for &'a T {
Expand Down Expand Up @@ -353,12 +334,6 @@ impl<'a, T: Array> Array for &'a T {
fn get_array_memory_size(&self) -> usize {
T::get_array_memory_size(self)
}

fn to_raw(
&self,
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
T::to_raw(self)
}
}

/// A generic trait for accessing the values of an [`Array`]
Expand Down Expand Up @@ -733,42 +708,6 @@ fn new_null_sized_decimal(
})
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
pub unsafe fn make_array_from_raw(
array: *const ffi::FFI_ArrowArray,
schema: *const ffi::FFI_ArrowSchema,
) -> Result<ArrayRef> {
let array = ffi::ArrowArray::try_from_raw(array, schema)?;
let data = ArrayData::try_from(array)?;
Ok(make_array(data))
}

/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
///
/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and
/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
/// Usually the raw pointers are provided by the array data consumer.
pub unsafe fn export_array_into_raw(
src: ArrayRef,
out_array: *mut ffi::FFI_ArrowArray,
out_schema: *mut ffi::FFI_ArrowSchema,
) -> Result<()> {
let data = src.data();
let array = ffi::FFI_ArrowArray::new(data);
let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?;

std::ptr::write_unaligned(out_array, array);
std::ptr::write_unaligned(out_schema, schema);

Ok(())
}

// Helper function for printing potentially long arrays.
pub(super) fn print_long_array<A, F>(
array: &A,
Expand Down
40 changes: 38 additions & 2 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::{
ffi::ArrowArrayRef,
};

use super::ArrayData;
use super::{make_array, ArrayData, ArrayRef};

impl TryFrom<ffi::ArrowArray> for ArrayData {
type Error = ArrowError;
Expand All @@ -39,10 +39,46 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
type Error = ArrowError;

fn try_from(value: ArrayData) -> Result<Self> {
unsafe { ffi::ArrowArray::try_new(value) }
ffi::ArrowArray::try_new(value)
}
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
pub unsafe fn make_array_from_raw(
array: *const ffi::FFI_ArrowArray,
schema: *const ffi::FFI_ArrowSchema,
) -> Result<ArrayRef> {
let array = ffi::ArrowArray::try_from_raw(array, schema)?;
let data = ArrayData::try_from(array)?;
Ok(make_array(data))
}

/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
///
/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and
/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
/// Usually the raw pointers are provided by the array data consumer.
pub unsafe fn export_array_into_raw(
src: ArrayRef,
out_array: *mut ffi::FFI_ArrowArray,
out_schema: *mut ffi::FFI_ArrowSchema,
) -> Result<()> {
let data = src.data();
let array = ffi::FFI_ArrowArray::new(data);
let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?;

std::ptr::write_unaligned(out_array, array);
std::ptr::write_unaligned(out_schema, schema);

Ok(())
}

#[cfg(test)]
mod tests {
use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array, StringArray};
Expand Down
4 changes: 3 additions & 1 deletion arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ mod cast;
mod data;
mod equal;
mod equal_json;
#[cfg(feature = "ffi")]
mod ffi;
mod iterator;
mod null;
Expand Down Expand Up @@ -615,7 +616,8 @@ pub use self::cast::{

// ------------------------------ C Data Interface ---------------------------

pub use self::array::{export_array_into_raw, make_array_from_raw};
#[cfg(feature = "ffi")]
pub use self::ffi::{export_array_into_raw, make_array_from_raw};

#[cfg(test)]
mod tests {
Expand Down
25 changes: 0 additions & 25 deletions arrow/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use std::sync::Arc;
use std::{convert::AsRef, usize};

use crate::alloc::{Allocation, Deallocation};
use crate::ffi::FFI_ArrowArray;
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
use crate::{bytes::Bytes, datatypes::ArrowNativeType};

Expand Down Expand Up @@ -77,30 +76,6 @@ impl Buffer {
Buffer::build_with_arguments(ptr, len, Deallocation::Arrow(capacity))
}

/// Creates a buffer from an existing memory region (must already be byte-aligned), this
/// `Buffer` **does not** free this piece of memory when dropped.
///
/// # Arguments
///
/// * `ptr` - Pointer to raw parts
/// * `len` - Length of raw parts in **bytes**
/// * `data` - An [crate::ffi::FFI_ArrowArray] with the data
///
/// # Safety
///
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
/// bytes and that the foreign deallocator frees the region.
#[deprecated(
note = "use from_custom_allocation instead which makes it clearer that the allocation is in fact owned"
)]
pub unsafe fn from_unowned(
ptr: NonNull<u8>,
len: usize,
data: Arc<FFI_ArrowArray>,
) -> Self {
Self::from_custom_allocation(ptr, len, data)
}

/// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting
/// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero.
///
Expand Down
4 changes: 3 additions & 1 deletion arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ pub use types::*;
mod datatype;
pub use datatype::*;
mod delta;
mod ffi;

#[cfg(feature = "ffi")]
mod ffi;
#[cfg(feature = "ffi")]
pub use ffi::*;

/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
Expand Down
17 changes: 10 additions & 7 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@
//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw};
//! # use arrow::error::{Result, ArrowError};
//! # use arrow::compute::kernels::arithmetic;
//! # use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
//! # use std::convert::TryFrom;
//! # fn main() -> Result<()> {
//! // create an array natively
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
//!
//! // export it
//! let (array_ptr, schema_ptr) = array.to_raw()?;
//!
//! let ffi_array = ArrowArray::try_new(array.data().clone())?;
//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array);
//!
//! // consumed and used by something else...
//!
Expand Down Expand Up @@ -456,7 +458,7 @@ struct ArrayPrivateData {

impl FFI_ArrowArray {
/// creates a new `FFI_ArrowArray` from existing data.
/// # Safety
/// # Memory Leaks
/// This method releases `buffers`. Consumers of this struct *must* call `release` before
/// releasing this struct, or contents in `buffers` leak.
pub fn new(data: &ArrayData) -> Self {
Expand Down Expand Up @@ -836,10 +838,11 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {

impl ArrowArray {
/// creates a new `ArrowArray`. This is used to export to the C Data Interface.
/// # Safety
/// See safety of [ArrowArray]
#[allow(clippy::too_many_arguments)]
pub unsafe fn try_new(data: ArrayData) -> Result<Self> {
///
/// # Memory Leaks
/// This method releases `buffers`. Consumers of this struct *must* call `release` before
/// releasing this struct, or contents in `buffers` leak.
pub fn try_new(data: ArrayData) -> Result<Self> {
let array = Arc::new(FFI_ArrowArray::new(&data));
let schema = Arc::new(FFI_ArrowSchema::try_from(data.data_type())?);
Ok(ArrowArray { array, schema })
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ pub mod compute;
pub mod csv;
pub mod datatypes;
pub mod error;
#[cfg(feature = "ffi")]
pub mod ffi;
#[cfg(feature = "ffi")]
pub mod ffi_stream;
#[cfg(feature = "ipc")]
pub mod ipc;
Expand Down

0 comments on commit d87f6a4

Please sign in to comment.