Skip to content

Commit

Permalink
[CHORE] Update PyO3 and use their new Bound API (#2793)
Browse files Browse the repository at this point in the history
We cannot yet update to the latest minor version, v0.22, because
rust-numpy only supports v0.21 right now.

There may be some small additional memory/performance optimizations we
can do with this new API, but I will leave it for another PR. This one
should not have any regressions at least.

Maybe next time i'll try using Cursor to do this 😄
  • Loading branch information
kevinzwang authored Sep 10, 2024
1 parent 3e2d25b commit 4582192
Show file tree
Hide file tree
Showing 56 changed files with 608 additions and 590 deletions.
81 changes: 46 additions & 35 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ features = ['async']
path = "src/parquet2"

[workspace.dependencies.pyo3]
features = ["extension-module", "multiple-pymethods", "abi3-py38"]
version = "0.19.2"
features = ["extension-module", "multiple-pymethods", "abi3-py38", "indexmap"]
version = "0.21.0"

[workspace.dependencies.pyo3-log]
version = "0.8.3"
version = "0.11.0"

[workspace.dependencies.serde]
features = ["derive", "rc"]
Expand Down
32 changes: 18 additions & 14 deletions src/common/arrow-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use pyo3::prelude::*;
pub type ArrayRef = Box<dyn Array>;

#[cfg(feature = "python")]
pub fn array_to_rust(arrow_array: &PyAny) -> PyResult<ArrayRef> {
pub fn array_to_rust(py: Python, arrow_array: Bound<PyAny>) -> PyResult<ArrayRef> {
// prepare a pointer to receive the Array struct
let array = Box::new(ffi::ArrowArray::empty());
let schema = Box::new(ffi::ArrowSchema::empty());
Expand All @@ -21,7 +21,7 @@ pub fn array_to_rust(arrow_array: &PyAny) -> PyResult<ArrayRef> {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds
arrow_array.call_method1(
pyo3::intern!(arrow_array.py(), "_export_to_c"),
pyo3::intern!(py, "_export_to_c"),
(array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t),
)?;

Expand All @@ -31,8 +31,13 @@ pub fn array_to_rust(arrow_array: &PyAny) -> PyResult<ArrayRef> {
Ok(array)
}
}

#[cfg(feature = "python")]
pub fn to_py_array(array: ArrayRef, py: Python, pyarrow: &PyModule) -> PyResult<PyObject> {
pub fn to_py_array<'py>(
py: Python<'py>,
array: ArrayRef,
pyarrow: &Bound<'py, PyModule>,
) -> PyResult<Bound<'py, PyAny>> {
let schema = Box::new(ffi::export_field_to_c(&Field::new(
"",
array.data_type().clone(),
Expand All @@ -49,18 +54,18 @@ pub fn to_py_array(array: ArrayRef, py: Python, pyarrow: &PyModule) -> PyResult<
(array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t),
)?;

let array = PyModule::import(py, pyo3::intern!(py, "daft.arrow_utils"))?
let array = PyModule::import_bound(py, pyo3::intern!(py, "daft.arrow_utils"))?
.getattr(pyo3::intern!(py, "remove_empty_struct_placeholders"))?
.call1((array,))?;

Ok(array.to_object(py))
Ok(array)
}

#[cfg(feature = "python")]
pub fn field_to_py(
field: &arrow2::datatypes::Field,
py: Python,
pyarrow: &PyModule,
field: &arrow2::datatypes::Field,
pyarrow: &Bound<PyModule>,
) -> PyResult<PyObject> {
let schema = Box::new(ffi::export_field_to_c(field));
let schema_ptr: *const ffi::ArrowSchema = &*schema;
Expand All @@ -70,25 +75,24 @@ pub fn field_to_py(
(schema_ptr as Py_uintptr_t,),
)?;

Ok(field.to_object(py))
Ok(field.into())
}

#[cfg(feature = "python")]
pub fn dtype_to_py(
pub fn dtype_to_py<'py>(
py: Python<'py>,
dtype: &arrow2::datatypes::DataType,
py: Python,
pyarrow: &PyModule,
) -> PyResult<PyObject> {
pyarrow: Bound<'py, PyModule>,
) -> PyResult<Bound<'py, PyAny>> {
let schema = Box::new(ffi::export_field_to_c(&Field::new("", dtype.clone(), true)));
let schema_ptr: *const ffi::ArrowSchema = &*schema;

let field = pyarrow.getattr(pyo3::intern!(py, "Field"))?.call_method1(
pyo3::intern!(py, "_import_from_c"),
(schema_ptr as Py_uintptr_t,),
)?;
let dtype = field.getattr(pyo3::intern!(py, "type"))?.to_object(py);

Ok(dtype.to_object(py))
field.getattr(pyo3::intern!(py, "type"))
}

fn fix_child_array_slice_offsets(array: ArrayRef) -> ArrayRef {
Expand Down
2 changes: 1 addition & 1 deletion src/common/daft-config/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dependencies]
bincode = {workspace = true}
common-io-config = {path = "../io-config", default-features = false}
common-py-serde = {path = "../py-serde", default-features = false}
pyo3 = {workspace = true, optional = true}
serde = {workspace = true}

Expand Down
2 changes: 1 addition & 1 deletion src/common/daft-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ pub use python::PyDaftPlanningConfig;
use pyo3::prelude::*;

#[cfg(feature = "python")]
pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> {
pub fn register_modules(parent: &Bound<PyModule>) -> PyResult<()> {
parent.add_class::<python::PyDaftExecutionConfig>()?;
parent.add_class::<python::PyDaftPlanningConfig>()?;

Expand Down
Loading

0 comments on commit 4582192

Please sign in to comment.