-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial commit of FFI table provider code * Add table type * Make struct pub * Implementing supports_filters_pushdown * Move plan properties over to its own file * Adding release function * Adding release functions to additional structs * Resolve memory leaks * Rename ForeignExecutionPlan for consistency * Resolving memory leak issues * Remove debug statements. Create runtime for block_on operations * Switching over to stable abi and async-ffi * Make consistent the use of Foreign and FFI on struct names * Apply prettier * Format for linter * Add doc-comment * Add option to specify table provider does not support pushdown filters to avoid extra work for some providers * Remove setting default features in cargo file * Tokio only needed for unit tests * Provide log errors rather than failing silently on schema requests * Set default features for datafusion to false in ffi crate * Using TryFrom or From instead of implementing new when there is only one parameter * Move arrow wrappers into their own file * Add documentation * Small adjustment to documentation * Add license text * Fix unnecessary qualification * taplo format
- Loading branch information
Showing
11 changed files
with
1,784 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
[package] | ||
name = "datafusion-ffi" | ||
description = "Foreign Function Interface implementation for DataFusion" | ||
readme = "README.md" | ||
version = { workspace = true } | ||
edition = { workspace = true } | ||
homepage = { workspace = true } | ||
repository = { workspace = true } | ||
license = { workspace = true } | ||
authors = { workspace = true } | ||
# Specify MSRV here as `cargo msrv` doesn't support workspace version | ||
rust-version = "1.76" | ||
|
||
[lints] | ||
workspace = true | ||
|
||
[lib] | ||
name = "datafusion_ffi" | ||
path = "src/lib.rs" | ||
|
||
[dependencies] | ||
abi_stable = "0.11.3" | ||
arrow = { workspace = true, features = ["ffi"] } | ||
async-ffi = { version = "0.5.0", features = ["abi_stable"] } | ||
async-trait = { workspace = true } | ||
datafusion = { workspace = true, default-features = false } | ||
datafusion-proto = { workspace = true } | ||
doc-comment = { workspace = true } | ||
futures = { workspace = true } | ||
log = { workspace = true } | ||
prost = { workspace = true } | ||
|
||
[dev-dependencies] | ||
tokio = { workspace = true } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
<!--- | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
--> | ||
|
||
# `datafusion-ffi`: Apache DataFusion Foreign Function Interface | ||
|
||
This crate contains code to allow interoperability of Apache [DataFusion] | ||
with functions from other languages using a stable interface. | ||
|
||
See [API Docs] for details and examples. | ||
|
||
We expect this crate may be used by both sides of the FFI. This allows users | ||
to create modules that can interoperate with the necessity of using the same | ||
version of DataFusion. The driving use case has been the `datafusion-python` | ||
repository, but many other use cases may exist. We envision at least two | ||
use cases. | ||
|
||
1. `datafusion-python` which will use the FFI to provide external services such | ||
as a `TableProvider` without needing to re-export the entire `datafusion-python` | ||
code base. With `datafusion-ffi` these packages do not need `datafusion-python` | ||
as a dependency at all. | ||
2. Users may want to create a modular interface that allows runtime loading of | ||
libraries. | ||
|
||
## Struct Layout | ||
|
||
In this crate we have a variety of structs which closely mimic the behavior of | ||
their internal counterparts. In the following example, we will refer to the | ||
`TableProvider`, but the same pattern exists for other structs. | ||
|
||
Each of the exposted structs in this crate is provided with a variant prefixed | ||
with `Foreign`. This variant is designed to be used by the consumer of the | ||
foreign code. The `Foreign` structs should _never_ access the `private_data` | ||
fields. Instead they should only access the data returned through the function | ||
calls defined on the `FFI_` structs. The second purpose of the `Foreign` | ||
structs is to contain additional data that may be needed by the traits that | ||
are implemented on them. Some of these traits require borrowing data which | ||
can be far more convienent to be locally stored. | ||
|
||
For example, we have a struct `FFI_TableProvider` to give access to the | ||
`TableProvider` functions like `table_type()` and `scan()`. If we write a | ||
library that wishes to expose it's `TableProvider`, then we can access the | ||
private data that contains the Arc reference to the `TableProvider` via | ||
`FFI_TableProvider`. This data is local to the library. | ||
|
||
If we have a program that accesses a `TableProvider` via FFI, then it | ||
will use `ForeignTableProvider`. When using `ForeignTableProvider` we **must** | ||
not attempt to access the `private_data` field in `FFI_TableProvider`. If a | ||
user is testing locally, you may be able to successfully access this field, but | ||
it will only work if you are building against the exact same version of | ||
`DataFusion` for both libraries **and** the same compiler. It will not work | ||
in general. | ||
|
||
It is worth noting that which library is the `local` and which is `foreign` | ||
depends on which interface we are considering. For example, suppose we have a | ||
Python library called `my_provider` that exposes a `TableProvider` called | ||
`MyProvider` via `FFI_TableProvider`. Within the library `my_provider` we can | ||
access the `private_data` via `FFI_TableProvider`. We connect this to | ||
`datafusion-python`, where we access it as a `ForeignTableProvider`. Now when | ||
we call `scan()` on this interface, we have to pass it a `FFI_SessionConfig`. | ||
The `SessionConfig` is local to `datafusion-python` and **not** `my_provider`. | ||
It is important to be careful when expanding these functions to be certain which | ||
side of the interface each object refers to. | ||
|
||
[datafusion]: https://datafusion.apache.org | ||
[api docs]: http://docs.rs/datafusion-ffi/latest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::sync::Arc; | ||
|
||
use abi_stable::StableAbi; | ||
use arrow::{ | ||
datatypes::{Schema, SchemaRef}, | ||
ffi::{FFI_ArrowArray, FFI_ArrowSchema}, | ||
}; | ||
use log::error; | ||
|
||
/// This is a wrapper struct around FFI_ArrowSchema simply to indicate | ||
/// to the StableAbi macros that the underlying struct is FFI safe. | ||
#[repr(C)] | ||
#[derive(Debug, StableAbi)] | ||
pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema); | ||
|
||
impl From<SchemaRef> for WrappedSchema { | ||
fn from(value: SchemaRef) -> Self { | ||
let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) { | ||
Ok(s) => s, | ||
Err(e) => { | ||
error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {}", e); | ||
FFI_ArrowSchema::empty() | ||
} | ||
}; | ||
|
||
WrappedSchema(ffi_schema) | ||
} | ||
} | ||
|
||
impl From<WrappedSchema> for SchemaRef { | ||
fn from(value: WrappedSchema) -> Self { | ||
let schema = match Schema::try_from(&value.0) { | ||
Ok(s) => s, | ||
Err(e) => { | ||
error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {}", e); | ||
Schema::empty() | ||
} | ||
}; | ||
Arc::new(schema) | ||
} | ||
} | ||
|
||
/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi | ||
/// that the struct is FFI Safe. For convenience, we also include the | ||
/// schema needed to create a record batch from the array. | ||
#[repr(C)] | ||
#[derive(Debug, StableAbi)] | ||
pub struct WrappedArray { | ||
#[sabi(unsafe_opaque_field)] | ||
pub array: FFI_ArrowArray, | ||
|
||
pub schema: WrappedSchema, | ||
} |
Oops, something went wrong.